From d7f1706150ac751e30f8b64ec951e6ece2dcf404 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii Date: Sat, 2 May 2026 13:40:29 +0900 Subject: [PATCH v47 8/9] Row pattern recognition patch (tests: expected). --- src/test/regress/expected/rpr.out | 3740 ++++++++++ src/test/regress/expected/rpr_base.out | 6589 +++++++++++++++++ src/test/regress/expected/rpr_explain.out | 4885 ++++++++++++ src/test/regress/expected/rpr_integration.out | 1518 ++++ src/test/regress/expected/rpr_nfa.out | 4661 ++++++++++++ 5 files changed, 21393 insertions(+) create mode 100644 src/test/regress/expected/rpr.out create mode 100644 src/test/regress/expected/rpr_base.out create mode 100644 src/test/regress/expected/rpr_explain.out create mode 100644 src/test/regress/expected/rpr_integration.out create mode 100644 src/test/regress/expected/rpr_nfa.out diff --git a/src/test/regress/expected/rpr.out b/src/test/regress/expected/rpr.out new file mode 100644 index 00000000000..85384f6b096 --- /dev/null +++ b/src/test/regress/expected/rpr.out @@ -0,0 +1,3740 @@ +-- +-- Test for row pattern recognition: WINDOW clause integration and +-- scenario tests using synthetic stock data. +-- +-- Parser/planner tests: rpr_base.sql +-- NFA engine tests: rpr_nfa.sql +-- EXPLAIN statistics tests: rpr_explain.sql +-- +\getenv abs_srcdir PG_ABS_SRCDIR +-- Synthetic stock data for RPR pattern matching tests +CREATE TABLE rpr_stock ( + part_id integer, + rn integer, + price numeric(10,3), + volume bigint, + open numeric(10,3), + low numeric(10,3), + high numeric(10,3) +); +\set filename :abs_srcdir '/data/stock.data' +COPY rpr_stock FROM :'filename'; +ANALYZE rpr_stock; +CREATE TEMP TABLE stock ( + company TEXT, + tdate DATE, + price INTEGER +); +INSERT INTO stock VALUES ('company1', '2023-07-01', 100); +INSERT INTO stock VALUES ('company1', '2023-07-02', 200); +INSERT INTO stock VALUES ('company1', '2023-07-03', 150); +INSERT INTO stock VALUES ('company1', '2023-07-04', 140); +INSERT INTO stock VALUES ('company1', '2023-07-05', 150); +INSERT INTO stock VALUES ('company1', '2023-07-06', 90); +INSERT INTO stock VALUES ('company1', '2023-07-07', 110); +INSERT INTO stock VALUES ('company1', '2023-07-08', 130); +INSERT INTO stock VALUES ('company1', '2023-07-09', 120); +INSERT INTO stock VALUES ('company1', '2023-07-10', 130); +INSERT INTO stock VALUES ('company2', '2023-07-01', 50); +INSERT INTO stock VALUES ('company2', '2023-07-02', 2000); +INSERT INTO stock VALUES ('company2', '2023-07-03', 1500); +INSERT INTO stock VALUES ('company2', '2023-07-04', 1400); +INSERT INTO stock VALUES ('company2', '2023-07-05', 1500); +INSERT INTO stock VALUES ('company2', '2023-07-06', 60); +INSERT INTO stock VALUES ('company2', '2023-07-07', 1100); +INSERT INTO stock VALUES ('company2', '2023-07-08', 1300); +INSERT INTO stock VALUES ('company2', '2023-07-09', 1200); +INSERT INTO stock VALUES ('company2', '2023-07-10', 1300); +SELECT * FROM stock; + company | tdate | price +----------+------------+------- + company1 | 07-01-2023 | 100 + company1 | 07-02-2023 | 200 + company1 | 07-03-2023 | 150 + company1 | 07-04-2023 | 140 + company1 | 07-05-2023 | 150 + company1 | 07-06-2023 | 90 + company1 | 07-07-2023 | 110 + company1 | 07-08-2023 | 130 + company1 | 07-09-2023 | 120 + company1 | 07-10-2023 | 130 + company2 | 07-01-2023 | 50 + company2 | 07-02-2023 | 2000 + company2 | 07-03-2023 | 1500 + company2 | 07-04-2023 | 1400 + company2 | 07-05-2023 | 1500 + company2 | 07-06-2023 | 60 + company2 | 07-07-2023 | 1100 + company2 | 07-08-2023 | 1300 + company2 | 07-09-2023 | 1200 + company2 | 07-10-2023 | 1300 +(20 rows) + +-- +-- Basic pattern matching with PREV/NEXT +-- +-- basic test using PREV +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, + nth_value(tdate, 2) OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | nth_second +----------+------------+-------+-------------+------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 | 07-02-2023 + company1 | 07-02-2023 | 200 | | | + company1 | 07-03-2023 | 150 | | | + company1 | 07-04-2023 | 140 | | | + company1 | 07-05-2023 | 150 | | | + company1 | 07-06-2023 | 90 | 90 | 120 | 07-07-2023 + company1 | 07-07-2023 | 110 | | | + company1 | 07-08-2023 | 130 | | | + company1 | 07-09-2023 | 120 | | | + company1 | 07-10-2023 | 130 | | | + company2 | 07-01-2023 | 50 | 50 | 1400 | 07-02-2023 + company2 | 07-02-2023 | 2000 | | | + company2 | 07-03-2023 | 1500 | | | + company2 | 07-04-2023 | 1400 | | | + company2 | 07-05-2023 | 1500 | | | + company2 | 07-06-2023 | 60 | 60 | 1200 | 07-07-2023 + company2 | 07-07-2023 | 1100 | | | + company2 | 07-08-2023 | 1300 | | | + company2 | 07-09-2023 | 1200 | | | + company2 | 07-10-2023 | 1300 | | | +(20 rows) + +-- basic test using PREV. UP appears twice +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, + nth_value(tdate, 2) OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+ UP+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | nth_second +----------+------------+-------+-------------+------------+------------ + company1 | 07-01-2023 | 100 | 100 | 150 | 07-02-2023 + company1 | 07-02-2023 | 200 | | | + company1 | 07-03-2023 | 150 | | | + company1 | 07-04-2023 | 140 | | | + company1 | 07-05-2023 | 150 | | | + company1 | 07-06-2023 | 90 | 90 | 130 | 07-07-2023 + company1 | 07-07-2023 | 110 | | | + company1 | 07-08-2023 | 130 | | | + company1 | 07-09-2023 | 120 | | | + company1 | 07-10-2023 | 130 | | | + company2 | 07-01-2023 | 50 | 50 | 1500 | 07-02-2023 + company2 | 07-02-2023 | 2000 | | | + company2 | 07-03-2023 | 1500 | | | + company2 | 07-04-2023 | 1400 | | | + company2 | 07-05-2023 | 1500 | | | + company2 | 07-06-2023 | 60 | 60 | 1300 | 07-07-2023 + company2 | 07-07-2023 | 1100 | | | + company2 | 07-08-2023 | 1300 | | | + company2 | 07-09-2023 | 1200 | | | + company2 | 07-10-2023 | 1300 | | | +(20 rows) + +-- basic test using PREV. Use '*' +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, + nth_value(tdate, 2) OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP* DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | nth_second +----------+------------+-------+-------------+------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 | 07-02-2023 + company1 | 07-02-2023 | 200 | | | + company1 | 07-03-2023 | 150 | | | + company1 | 07-04-2023 | 140 | | | + company1 | 07-05-2023 | 150 | 150 | 90 | 07-06-2023 + company1 | 07-06-2023 | 90 | | | + company1 | 07-07-2023 | 110 | 110 | 120 | 07-08-2023 + company1 | 07-08-2023 | 130 | | | + company1 | 07-09-2023 | 120 | | | + company1 | 07-10-2023 | 130 | | | + company2 | 07-01-2023 | 50 | 50 | 1400 | 07-02-2023 + company2 | 07-02-2023 | 2000 | | | + company2 | 07-03-2023 | 1500 | | | + company2 | 07-04-2023 | 1400 | | | + company2 | 07-05-2023 | 1500 | 1500 | 60 | 07-06-2023 + company2 | 07-06-2023 | 60 | | | + company2 | 07-07-2023 | 1100 | 1100 | 1200 | 07-08-2023 + company2 | 07-08-2023 | 1300 | | | + company2 | 07-09-2023 | 1200 | | | + company2 | 07-10-2023 | 1300 | | | +(20 rows) + +-- basic test using PREV. Use '?' +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, + nth_value(tdate, 2) OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP? DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | nth_second +----------+------------+-------+-------------+------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 | 07-02-2023 + company1 | 07-02-2023 | 200 | | | + company1 | 07-03-2023 | 150 | | | + company1 | 07-04-2023 | 140 | | | + company1 | 07-05-2023 | 150 | 150 | 90 | 07-06-2023 + company1 | 07-06-2023 | 90 | | | + company1 | 07-07-2023 | 110 | 110 | 120 | 07-08-2023 + company1 | 07-08-2023 | 130 | | | + company1 | 07-09-2023 | 120 | | | + company1 | 07-10-2023 | 130 | | | + company2 | 07-01-2023 | 50 | 50 | 1400 | 07-02-2023 + company2 | 07-02-2023 | 2000 | | | + company2 | 07-03-2023 | 1500 | | | + company2 | 07-04-2023 | 1400 | | | + company2 | 07-05-2023 | 1500 | 1500 | 60 | 07-06-2023 + company2 | 07-06-2023 | 60 | | | + company2 | 07-07-2023 | 1100 | 1100 | 1200 | 07-08-2023 + company2 | 07-08-2023 | 1300 | | | + company2 | 07-09-2023 | 1200 | | | + company2 | 07-10-2023 | 1300 | | | +(20 rows) + +-- test using alternation (|) with sequence +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START (UP | DOWN)) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 200 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | 150 | 140 + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | 150 | 90 + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 110 | 130 + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | 120 | 130 + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 2000 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | 1500 | 1400 + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | 1500 | 60 + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 1100 | 1300 + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | 1200 | 1300 + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- test using alternation (|) with group quantifier +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START (UP | DOWN)+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 130 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 1300 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- test using nested alternation +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START ((UP DOWN) | FLAT)+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price), + FLAT AS price = PREV(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 150 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | 140 | 90 + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 110 | 120 + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 1500 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | 1400 | 60 + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 1100 | 1200 + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- test using group with quantifier +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((UP DOWN)+) + DEFINE + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | | + company1 | 07-02-2023 | 200 | 200 | 150 + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | 150 | 90 + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | 130 | 120 + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | | + company2 | 07-02-2023 | 2000 | 2000 | 1500 + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | 1500 | 60 + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | 1300 | 1200 + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- test using absolute threshold values (not relative PREV) +-- HIGH: price > 150, LOW: price < 100, MID: neutral range +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOW MID* HIGH) + DEFINE + LOW AS price < 100, + MID AS price >= 100 AND price <= 150, + HIGH AS price > 150 +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | | + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 2000 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | 60 | 1100 + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- test threshold-based pattern with alternation +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOW (MID | HIGH)+) + DEFINE + LOW AS price < 100, + MID AS price >= 100 AND price <= 150, + HIGH AS price > 150 +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | | + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | 90 | 130 + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 1500 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | 60 | 1300 + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- basic test with fixed-length pattern (A A A = exactly 3) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A A A) + DEFINE + A AS price >= 140 AND price <= 150 +); + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | 0 + company1 | 07-02-2023 | 200 | 0 + company1 | 07-03-2023 | 150 | 3 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-07-2023 | 110 | 0 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-10-2023 | 130 | 0 + company2 | 07-01-2023 | 50 | 0 + company2 | 07-02-2023 | 2000 | 0 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-07-2023 | 1100 | 0 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-10-2023 | 1300 | 0 +(20 rows) + +-- test using {n} quantifier (A A A should be optimized to A{3}) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{3}) + DEFINE + A AS price >= 140 AND price <= 150 +); + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | 0 + company1 | 07-02-2023 | 200 | 0 + company1 | 07-03-2023 | 150 | 3 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-07-2023 | 110 | 0 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-10-2023 | 130 | 0 + company2 | 07-01-2023 | 50 | 0 + company2 | 07-02-2023 | 2000 | 0 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-07-2023 | 1100 | 0 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-10-2023 | 1300 | 0 +(20 rows) + +-- test using {n,} quantifier (2 or more) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,}) + DEFINE + A AS price > 100 +); + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | 0 + company1 | 07-02-2023 | 200 | 4 + company1 | 07-03-2023 | 150 | 0 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-07-2023 | 110 | 4 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-10-2023 | 130 | 0 + company2 | 07-01-2023 | 50 | 0 + company2 | 07-02-2023 | 2000 | 4 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-07-2023 | 1100 | 4 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-10-2023 | 1300 | 0 +(20 rows) + +-- test using {n,m} quantifier (2 to 4) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,4}) + DEFINE + A AS price > 100 +); + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | 0 + company1 | 07-02-2023 | 200 | 4 + company1 | 07-03-2023 | 150 | 0 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-07-2023 | 110 | 4 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-10-2023 | 130 | 0 + company2 | 07-01-2023 | 50 | 0 + company2 | 07-02-2023 | 2000 | 4 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-07-2023 | 1100 | 4 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-10-2023 | 1300 | 0 +(20 rows) + +-- test prefix/suffix merge optimization with bounded quantifier +-- Pattern A B (A B){1,2} A B should be optimized to (A B){3,4} +CREATE TEMP TABLE rpr_t (id int, val text); +INSERT INTO rpr_t VALUES + (1,'A'),(2,'B'), + (3,'A'),(4,'B'), + (5,'A'),(6,'B'), + (7,'A'),(8,'B'), + (9,'X'); +SELECT id, val, count(*) OVER w AS match_count +FROM rpr_t +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + INITIAL + PATTERN (A B (A B){1,2} A B) + DEFINE + A AS val = 'A', + B AS val = 'B' +); + id | val | match_count +----+-----+------------- + 1 | A | 8 + 2 | B | 0 + 3 | A | 6 + 4 | B | 0 + 5 | A | 0 + 6 | B | 0 + 7 | A | 0 + 8 | B | 0 + 9 | X | 0 +(9 rows) + +DROP TABLE rpr_t; +-- last_value() should remain consistent +SELECT company, tdate, price, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | last_value +----------+------------+-------+------------ + company1 | 07-01-2023 | 100 | 140 + company1 | 07-02-2023 | 200 | + company1 | 07-03-2023 | 150 | + company1 | 07-04-2023 | 140 | + company1 | 07-05-2023 | 150 | + company1 | 07-06-2023 | 90 | 120 + company1 | 07-07-2023 | 110 | + company1 | 07-08-2023 | 130 | + company1 | 07-09-2023 | 120 | + company1 | 07-10-2023 | 130 | + company2 | 07-01-2023 | 50 | 1400 + company2 | 07-02-2023 | 2000 | + company2 | 07-03-2023 | 1500 | + company2 | 07-04-2023 | 1400 | + company2 | 07-05-2023 | 1500 | + company2 | 07-06-2023 | 60 | 1200 + company2 | 07-07-2023 | 1100 | + company2 | 07-08-2023 | 1300 | + company2 | 07-09-2023 | 1200 | + company2 | 07-10-2023 | 1300 | +(20 rows) + +-- omit "START" in DEFINE but it is ok because "START AS TRUE" is +-- implicitly defined. per spec. +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, + nth_value(tdate, 2) OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | nth_second +----------+------------+-------+-------------+------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 | 07-02-2023 + company1 | 07-02-2023 | 200 | | | + company1 | 07-03-2023 | 150 | | | + company1 | 07-04-2023 | 140 | | | + company1 | 07-05-2023 | 150 | | | + company1 | 07-06-2023 | 90 | 90 | 120 | 07-07-2023 + company1 | 07-07-2023 | 110 | | | + company1 | 07-08-2023 | 130 | | | + company1 | 07-09-2023 | 120 | | | + company1 | 07-10-2023 | 130 | | | + company2 | 07-01-2023 | 50 | 50 | 1400 | 07-02-2023 + company2 | 07-02-2023 | 2000 | | | + company2 | 07-03-2023 | 1500 | | | + company2 | 07-04-2023 | 1400 | | | + company2 | 07-05-2023 | 1500 | | | + company2 | 07-06-2023 | 60 | 60 | 1200 | 07-07-2023 + company2 | 07-07-2023 | 1100 | | | + company2 | 07-08-2023 | 1300 | | | + company2 | 07-09-2023 | 1200 | | | + company2 | 07-10-2023 | 1300 | | | +(20 rows) + +-- the first row start with less than or equal to 100 +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOWPRICE UP+ DOWN+) + DEFINE + LOWPRICE AS price <= 100, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | 90 | 120 + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 1400 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | 60 | 1200 + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- second row raises 120% +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOWPRICE UP+ DOWN+) + DEFINE + LOWPRICE AS price <= 100, + UP AS price > PREV(price) * 1.2, + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 1400 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- using NEXT +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UPDOWN) + DEFINE + START AS TRUE, + UPDOWN AS price > PREV(price) AND price > NEXT(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 200 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | 140 | 150 + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 110 | 130 + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 2000 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | 1400 | 1500 + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 1100 | 1300 + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- using AFTER MATCH SKIP TO NEXT ROW (same pattern as above; +-- match length is always 2, so result is identical to SKIP PAST LAST ROW. +-- SKIP TO NEXT ROW's distinct effect is tested in backtracking section.) +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + INITIAL + PATTERN (START UPDOWN) + DEFINE + START AS TRUE, + UPDOWN AS price > PREV(price) AND price > NEXT(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 200 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | 140 | 150 + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 110 | 130 + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 2000 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | 1400 | 1500 + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 1100 | 1300 + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- PREV returns NULL at partition's first row (null_slot path) +SELECT company, tdate, price, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (BOUNDARY REST+) + DEFINE + BOUNDARY AS PREV(price) IS NULL, + REST AS PREV(price) IS NOT NULL +); + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | 10 + company1 | 07-02-2023 | 200 | 0 + company1 | 07-03-2023 | 150 | 0 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-07-2023 | 110 | 0 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-10-2023 | 130 | 0 + company2 | 07-01-2023 | 50 | 10 + company2 | 07-02-2023 | 2000 | 0 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-07-2023 | 1100 | 0 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-10-2023 | 1300 | 0 +(20 rows) + +-- NEXT returns NULL at partition's last row (null_slot path) +SELECT company, tdate, price, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ BOUNDARY) + DEFINE + A AS NEXT(price) IS NOT NULL, + BOUNDARY AS NEXT(price) IS NULL +); + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | 10 + company1 | 07-02-2023 | 200 | 0 + company1 | 07-03-2023 | 150 | 0 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-07-2023 | 110 | 0 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-10-2023 | 130 | 0 + company2 | 07-01-2023 | 50 | 10 + company2 | 07-02-2023 | 2000 | 0 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-07-2023 | 1100 | 0 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-10-2023 | 1300 | 0 +(20 rows) + +-- DESC order: PREV refers to the row with later date +SELECT company, tdate, price, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate DESC + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START DOWN+ UP+) + DEFINE + START AS TRUE, + DOWN AS price < PREV(price), + UP AS price > PREV(price) +); + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-10-2023 | 130 | 3 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-07-2023 | 110 | 3 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-03-2023 | 150 | 0 + company1 | 07-02-2023 | 200 | 0 + company1 | 07-01-2023 | 100 | 0 + company2 | 07-10-2023 | 1300 | 3 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-07-2023 | 1100 | 3 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-02-2023 | 2000 | 0 + company2 | 07-01-2023 | 50 | 0 +(20 rows) + +-- Multiple partitions with unequal sizes +WITH multi_part AS ( + SELECT * FROM (VALUES + ('a', 1, 10), ('a', 2, 20), ('a', 3, 15), + ('b', 1, 5), + ('c', 1, 100), ('c', 2, 200), ('c', 3, 150), ('c', 4, 140), ('c', 5, 300) + ) AS t(grp, id, val) +) +SELECT grp, id, val, count(*) OVER w +FROM multi_part +WINDOW w AS ( + PARTITION BY grp + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE + A AS val <= NEXT(val), + B AS val > PREV(val) OR val < PREV(val) +); + grp | id | val | count +-----+----+-----+------- + a | 1 | 10 | 3 + a | 2 | 20 | 0 + a | 3 | 15 | 0 + b | 1 | 5 | 0 + c | 1 | 100 | 5 + c | 2 | 200 | 0 + c | 3 | 150 | 0 + c | 4 | 140 | 0 + c | 5 | 300 | 0 +(9 rows) + +-- FLOAT/NUMERIC DEFINE conditions +WITH float_data AS ( + SELECT * FROM (VALUES + (1, 1.0::float8), (2, 1.5), (3, 1.4999), (4, 1.50001), (5, 0.1) + ) AS t(id, val) +) +SELECT id, val, count(*) OVER w +FROM float_data +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE + A AS TRUE, + B AS val > PREV(val) * 0.99 +); + id | val | count +----+---------+------- + 1 | 1 | 4 + 2 | 1.5 | 0 + 3 | 1.4999 | 0 + 4 | 1.50001 | 0 + 5 | 0.1 | 0 +(5 rows) + +-- +-- Error cases: PREV/NEXT usage restrictions +-- +-- PREV outside DEFINE clause +SELECT prev(price) FROM stock; +ERROR: cannot use prev outside a DEFINE clause +LINE 1: SELECT prev(price) FROM stock; + ^ +-- NEXT outside DEFINE clause +SELECT next(price) FROM stock; +ERROR: cannot use next outside a DEFINE clause +LINE 1: SELECT next(price) FROM stock; + ^ +-- Nested PREV +SELECT price FROM stock +WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A) + DEFINE A AS price > PREV(PREV(price)) +); +ERROR: PREV and NEXT cannot contain PREV or NEXT +LINE 7: DEFINE A AS price > PREV(PREV(price)) + ^ +HINT: Only PREV(FIRST()), PREV(LAST()), NEXT(FIRST()), and NEXT(LAST()) compound forms are allowed. +-- Nested NEXT +SELECT price FROM stock +WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A) + DEFINE A AS price > NEXT(NEXT(price)) +); +ERROR: PREV and NEXT cannot contain PREV or NEXT +LINE 7: DEFINE A AS price > NEXT(NEXT(price)) + ^ +HINT: Only PREV(FIRST()), PREV(LAST()), NEXT(FIRST()), and NEXT(LAST()) compound forms are allowed. +-- PREV nested inside NEXT +SELECT price FROM stock +WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A) + DEFINE A AS price > NEXT(PREV(price)) +); +ERROR: PREV and NEXT cannot contain PREV or NEXT +LINE 7: DEFINE A AS price > NEXT(PREV(price)) + ^ +HINT: Only PREV(FIRST()), PREV(LAST()), NEXT(FIRST()), and NEXT(LAST()) compound forms are allowed. +-- PREV nested inside expression inside NEXT +SELECT price FROM stock +WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A) + DEFINE A AS price > NEXT(price * PREV(price)) +); +ERROR: PREV and NEXT cannot contain PREV or NEXT +LINE 7: DEFINE A AS price > NEXT(price * PREV(price)) + ^ +HINT: Only PREV(FIRST()), PREV(LAST()), NEXT(FIRST()), and NEXT(LAST()) compound forms are allowed. +-- Triple nesting: error reported at outermost PREV +SELECT price FROM stock +WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A) + DEFINE A AS price > PREV(PREV(PREV(price))) +); +ERROR: PREV and NEXT cannot contain PREV or NEXT +LINE 7: DEFINE A AS price > PREV(PREV(PREV(price))) + ^ +HINT: Only PREV(FIRST()), PREV(LAST()), NEXT(FIRST()), and NEXT(LAST()) compound forms are allowed. +-- No column reference in PREV/NEXT argument +-- PREV(1): constant only, no column reference +SELECT price FROM stock +WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A) + DEFINE A AS PREV(1) > 0 +); +ERROR: argument of row pattern navigation operation must include at least one column reference +LINE 7: DEFINE A AS PREV(1) > 0 + ^ +-- NEXT(1 + 2): constant expression, no column reference +SELECT price FROM stock +WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A) + DEFINE A AS NEXT(1 + 2) > 0 +); +ERROR: argument of row pattern navigation operation must include at least one column reference +LINE 7: DEFINE A AS NEXT(1 + 2) > 0 + ^ +-- 2-arg form: PREV(1, 1): constant expression as first arg +SELECT price FROM stock +WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A) + DEFINE A AS PREV(1, 1) > 0 +); +ERROR: argument of row pattern navigation operation must include at least one column reference +LINE 7: DEFINE A AS PREV(1, 1) > 0 + ^ +-- Non-constant offset: column reference as offset +SELECT price FROM stock +WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A) + DEFINE A AS PREV(price, price) > 0 +); +ERROR: row pattern navigation offset must be a run-time constant +LINE 7: DEFINE A AS PREV(price, price) > 0 + ^ +-- Non-constant offset: volatile function as offset +SELECT price FROM stock +WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A) + DEFINE A AS PREV(price, random()::int) > 0 +); +ERROR: row pattern navigation offset must be a run-time constant +LINE 7: DEFINE A AS PREV(price, random()::int) > 0 + ^ +-- Non-constant offset: subquery as offset +SELECT price FROM stock +WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A) + DEFINE A AS PREV(price, (SELECT 1)) > 0 +); +ERROR: cannot use subquery in DEFINE expression +LINE 7: DEFINE A AS PREV(price, (SELECT 1)) > 0 + ^ +-- First arg: subquery (caught by DEFINE-level subquery restriction) +SELECT price FROM stock +WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A) + DEFINE A AS PREV(price + (SELECT 1)) > 0 +); +ERROR: cannot use subquery in DEFINE expression +LINE 7: DEFINE A AS PREV(price + (SELECT 1)) > 0 + ^ +-- First arg: volatile function is allowed (evaluated on target row) +SELECT company, tdate, price, + first_value(price) OVER w, last_value(price) OVER w, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS PREV(price + random() * 0) >= 0 +); + company | tdate | price | first_value | last_value | count +----------+------------+-------+-------------+------------+------- + company1 | 07-01-2023 | 100 | | | 0 + company1 | 07-02-2023 | 200 | 200 | 130 | 9 + company1 | 07-03-2023 | 150 | | | 0 + company1 | 07-04-2023 | 140 | | | 0 + company1 | 07-05-2023 | 150 | | | 0 + company1 | 07-06-2023 | 90 | | | 0 + company1 | 07-07-2023 | 110 | | | 0 + company1 | 07-08-2023 | 130 | | | 0 + company1 | 07-09-2023 | 120 | | | 0 + company1 | 07-10-2023 | 130 | | | 0 + company2 | 07-01-2023 | 50 | | | 0 + company2 | 07-02-2023 | 2000 | 2000 | 1300 | 9 + company2 | 07-03-2023 | 1500 | | | 0 + company2 | 07-04-2023 | 1400 | | | 0 + company2 | 07-05-2023 | 1500 | | | 0 + company2 | 07-06-2023 | 60 | | | 0 + company2 | 07-07-2023 | 1100 | | | 0 + company2 | 07-08-2023 | 1300 | | | 0 + company2 | 07-09-2023 | 1200 | | | 0 + company2 | 07-10-2023 | 1300 | | | 0 +(20 rows) + +-- +-- 2-arg PREV/NEXT: functional tests +-- +-- PREV(price, 2): match rows where current price > price 2 rows back +-- stock: 100, 90, 80, 95, 110 +-- Pattern (A B+): A=any, B where price > PREV(price, 2) +-- At pos 2 (80): A matches. pos 3 (95): 95 > PREV(95,2)=90 TRUE. +-- pos 4 (110): 110 > PREV(110,2)=80 TRUE. Match! +SELECT company, tdate, price, + first_value(price) OVER w, last_value(price) OVER w, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE + A AS TRUE, + B AS price > PREV(price, 2) +); + company | tdate | price | first_value | last_value | count +----------+------------+-------+-------------+------------+------- + company1 | 07-01-2023 | 100 | | | 0 + company1 | 07-02-2023 | 200 | 200 | 150 | 2 + company1 | 07-03-2023 | 150 | | | 0 + company1 | 07-04-2023 | 140 | | | 0 + company1 | 07-05-2023 | 150 | | | 0 + company1 | 07-06-2023 | 90 | | | 0 + company1 | 07-07-2023 | 110 | 110 | 120 | 3 + company1 | 07-08-2023 | 130 | | | 0 + company1 | 07-09-2023 | 120 | | | 0 + company1 | 07-10-2023 | 130 | | | 0 + company2 | 07-01-2023 | 50 | | | 0 + company2 | 07-02-2023 | 2000 | 2000 | 1500 | 2 + company2 | 07-03-2023 | 1500 | | | 0 + company2 | 07-04-2023 | 1400 | | | 0 + company2 | 07-05-2023 | 1500 | | | 0 + company2 | 07-06-2023 | 60 | | | 0 + company2 | 07-07-2023 | 1100 | 1100 | 1200 | 3 + company2 | 07-08-2023 | 1300 | | | 0 + company2 | 07-09-2023 | 1200 | | | 0 + company2 | 07-10-2023 | 1300 | | | 0 +(20 rows) + +-- NEXT(price, 2): match rows where current price > price 2 rows ahead +-- pos 0 (100): NEXT(100,2)=80, 100>80 TRUE. pos 1 (90): NEXT(90,2)=95, 90>95 FALSE. Match ends. +SELECT company, tdate, price, + first_value(price) OVER w, last_value(price) OVER w, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS price > NEXT(price, 2) +); + company | tdate | price | first_value | last_value | count +----------+------------+-------+-------------+------------+------- + company1 | 07-01-2023 | 100 | | | 0 + company1 | 07-02-2023 | 200 | 200 | 200 | 1 + company1 | 07-03-2023 | 150 | | | 0 + company1 | 07-04-2023 | 140 | 140 | 150 | 2 + company1 | 07-05-2023 | 150 | | | 0 + company1 | 07-06-2023 | 90 | | | 0 + company1 | 07-07-2023 | 110 | | | 0 + company1 | 07-08-2023 | 130 | | | 0 + company1 | 07-09-2023 | 120 | | | 0 + company1 | 07-10-2023 | 130 | | | 0 + company2 | 07-01-2023 | 50 | | | 0 + company2 | 07-02-2023 | 2000 | 2000 | 2000 | 1 + company2 | 07-03-2023 | 1500 | | | 0 + company2 | 07-04-2023 | 1400 | 1400 | 1500 | 2 + company2 | 07-05-2023 | 1500 | | | 0 + company2 | 07-06-2023 | 60 | | | 0 + company2 | 07-07-2023 | 1100 | | | 0 + company2 | 07-08-2023 | 1300 | | | 0 + company2 | 07-09-2023 | 1200 | | | 0 + company2 | 07-10-2023 | 1300 | | | 0 +(20 rows) + +-- Expressions inside PREV/NEXT arg: expr is evaluated on target row +-- PREV(price - 50, 1): fetches (price - 50) from 1 row back +SELECT company, tdate, price, + first_value(price) OVER w, last_value(price) OVER w, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS price > PREV(price - 50, 1) +); + company | tdate | price | first_value | last_value | count +----------+------------+-------+-------------+------------+------- + company1 | 07-01-2023 | 100 | | | 0 + company1 | 07-02-2023 | 200 | 200 | 200 | 1 + company1 | 07-03-2023 | 150 | | | 0 + company1 | 07-04-2023 | 140 | 140 | 150 | 2 + company1 | 07-05-2023 | 150 | | | 0 + company1 | 07-06-2023 | 90 | | | 0 + company1 | 07-07-2023 | 110 | 110 | 130 | 4 + company1 | 07-08-2023 | 130 | | | 0 + company1 | 07-09-2023 | 120 | | | 0 + company1 | 07-10-2023 | 130 | | | 0 + company2 | 07-01-2023 | 50 | | | 0 + company2 | 07-02-2023 | 2000 | 2000 | 2000 | 1 + company2 | 07-03-2023 | 1500 | | | 0 + company2 | 07-04-2023 | 1400 | | | 0 + company2 | 07-05-2023 | 1500 | 1500 | 1500 | 1 + company2 | 07-06-2023 | 60 | | | 0 + company2 | 07-07-2023 | 1100 | 1100 | 1300 | 2 + company2 | 07-08-2023 | 1300 | | | 0 + company2 | 07-09-2023 | 1200 | | | 0 + company2 | 07-10-2023 | 1300 | 1300 | 1300 | 1 +(20 rows) + +-- NEXT(price * 2, 1): fetches (price * 2) from 1 row ahead +SELECT company, tdate, price, + first_value(price) OVER w, last_value(price) OVER w, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS price < NEXT(price * 2, 1) +); + company | tdate | price | first_value | last_value | count +----------+------------+-------+-------------+------------+------- + company1 | 07-01-2023 | 100 | 100 | 120 | 9 + company1 | 07-02-2023 | 200 | | | 0 + company1 | 07-03-2023 | 150 | | | 0 + company1 | 07-04-2023 | 140 | | | 0 + company1 | 07-05-2023 | 150 | | | 0 + company1 | 07-06-2023 | 90 | | | 0 + company1 | 07-07-2023 | 110 | | | 0 + company1 | 07-08-2023 | 130 | | | 0 + company1 | 07-09-2023 | 120 | | | 0 + company1 | 07-10-2023 | 130 | | | 0 + company2 | 07-01-2023 | 50 | 50 | 1400 | 4 + company2 | 07-02-2023 | 2000 | | | 0 + company2 | 07-03-2023 | 1500 | | | 0 + company2 | 07-04-2023 | 1400 | | | 0 + company2 | 07-05-2023 | 1500 | | | 0 + company2 | 07-06-2023 | 60 | 60 | 1200 | 4 + company2 | 07-07-2023 | 1100 | | | 0 + company2 | 07-08-2023 | 1300 | | | 0 + company2 | 07-09-2023 | 1200 | | | 0 + company2 | 07-10-2023 | 1300 | | | 0 +(20 rows) + +-- Large offset: PREV(val, 999) on 1000-row series matches only last row +-- NEXT(val, 999) matches only first row +SELECT val, first_value(val) OVER w, last_value(val) OVER w, count(*) OVER w +FROM generate_series(1, 1000) AS t(val) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS PREV(val, 999) = 1 +) +ORDER BY val DESC LIMIT 3; + val | first_value | last_value | count +------+-------------+------------+------- + 1000 | 1000 | 1000 | 1 + 999 | | | 0 + 998 | | | 0 +(3 rows) + +SELECT val, first_value(val) OVER w, last_value(val) OVER w, count(*) OVER w +FROM generate_series(1, 1000) AS t(val) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS NEXT(val, 999) = 1000 +) +LIMIT 3; + val | first_value | last_value | count +-----+-------------+------------+------- + 1 | 1 | 1 | 1 + 2 | | | 0 + 3 | | | 0 +(3 rows) + +-- PREV(price, 0): offset 0 means current row, always equal to price +-- A+ matches entire partition as one group; count = partition size +SELECT company, tdate, price, + first_value(price) OVER w, last_value(price) OVER w, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS PREV(price, 0) = price +); + company | tdate | price | first_value | last_value | count +----------+------------+-------+-------------+------------+------- + company1 | 07-01-2023 | 100 | 100 | 130 | 10 + company1 | 07-02-2023 | 200 | | | 0 + company1 | 07-03-2023 | 150 | | | 0 + company1 | 07-04-2023 | 140 | | | 0 + company1 | 07-05-2023 | 150 | | | 0 + company1 | 07-06-2023 | 90 | | | 0 + company1 | 07-07-2023 | 110 | | | 0 + company1 | 07-08-2023 | 130 | | | 0 + company1 | 07-09-2023 | 120 | | | 0 + company1 | 07-10-2023 | 130 | | | 0 + company2 | 07-01-2023 | 50 | 50 | 1300 | 10 + company2 | 07-02-2023 | 2000 | | | 0 + company2 | 07-03-2023 | 1500 | | | 0 + company2 | 07-04-2023 | 1400 | | | 0 + company2 | 07-05-2023 | 1500 | | | 0 + company2 | 07-06-2023 | 60 | | | 0 + company2 | 07-07-2023 | 1100 | | | 0 + company2 | 07-08-2023 | 1300 | | | 0 + company2 | 07-09-2023 | 1200 | | | 0 + company2 | 07-10-2023 | 1300 | | | 0 +(20 rows) + +-- 2-arg PREV/NEXT outside DEFINE clause +SELECT prev(price, 2) FROM stock; +ERROR: cannot use prev outside a DEFINE clause +LINE 1: SELECT prev(price, 2) FROM stock; + ^ +SELECT next(price, 2) FROM stock; +ERROR: cannot use next outside a DEFINE clause +LINE 1: SELECT next(price, 2) FROM stock; + ^ +-- 2-arg PREV/NEXT: negative offset +SELECT company, tdate, price, first_value(price) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS PREV(price, -1) IS NOT NULL +); +ERROR: row pattern navigation offset must not be negative +-- 2-arg PREV/NEXT: NULL offset (typed) +SELECT company, tdate, price, first_value(price) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS PREV(price, NULL::int8) IS NOT NULL +); +ERROR: row pattern navigation offset must not be null +-- 2-arg PREV/NEXT: NULL offset (untyped) +SELECT company, tdate, price, first_value(price) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS PREV(price, NULL) IS NOT NULL +); +ERROR: row pattern navigation offset must not be null +-- 2-arg PREV/NEXT: host variable negative and NULL +PREPARE test_prev_offset(int8) AS +SELECT company, tdate, price, first_value(price) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS price > PREV(price, $1) +); +EXECUTE test_prev_offset(-1); +ERROR: row pattern navigation offset must not be negative +EXECUTE test_prev_offset(NULL); +ERROR: row pattern navigation offset must not be null +DEALLOCATE test_prev_offset; +-- 2-arg PREV/NEXT: host variable with expression (0 + $1) +PREPARE test_prev_offset(int8) AS +SELECT company, tdate, price, first_value(price) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS price > PREV(price, 0 + $1) +); +EXECUTE test_prev_offset(-1); +ERROR: row pattern navigation offset must not be negative +EXECUTE test_prev_offset(NULL); +ERROR: row pattern navigation offset must not be null +DEALLOCATE test_prev_offset; +-- 2-arg PREV/NEXT: host variable with positive value +-- Exercises RPR_NAV_OFFSET_NEEDS_EVAL -> eval_nav_max_offset() path +PREPARE test_prev_offset(int8) AS +SELECT company, tdate, price, first_value(price) OVER w, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS price > PREV(price, $1) +); +EXECUTE test_prev_offset(1); + company | tdate | price | first_value | count +----------+------------+-------+-------------+------- + company1 | 07-01-2023 | 100 | 100 | 2 + company1 | 07-02-2023 | 200 | | 0 + company1 | 07-03-2023 | 150 | | 0 + company1 | 07-04-2023 | 140 | 140 | 2 + company1 | 07-05-2023 | 150 | | 0 + company1 | 07-06-2023 | 90 | 90 | 3 + company1 | 07-07-2023 | 110 | | 0 + company1 | 07-08-2023 | 130 | | 0 + company1 | 07-09-2023 | 120 | 120 | 2 + company1 | 07-10-2023 | 130 | | 0 + company2 | 07-01-2023 | 50 | 50 | 2 + company2 | 07-02-2023 | 2000 | | 0 + company2 | 07-03-2023 | 1500 | | 0 + company2 | 07-04-2023 | 1400 | 1400 | 2 + company2 | 07-05-2023 | 1500 | | 0 + company2 | 07-06-2023 | 60 | 60 | 3 + company2 | 07-07-2023 | 1100 | | 0 + company2 | 07-08-2023 | 1300 | | 0 + company2 | 07-09-2023 | 1200 | 1200 | 2 + company2 | 07-10-2023 | 1300 | | 0 +(20 rows) + +EXECUTE test_prev_offset(2); + company | tdate | price | first_value | count +----------+------------+-------+-------------+------- + company1 | 07-01-2023 | 100 | | 0 + company1 | 07-02-2023 | 200 | 200 | 2 + company1 | 07-03-2023 | 150 | | 0 + company1 | 07-04-2023 | 140 | | 0 + company1 | 07-05-2023 | 150 | | 0 + company1 | 07-06-2023 | 90 | | 0 + company1 | 07-07-2023 | 110 | 110 | 3 + company1 | 07-08-2023 | 130 | | 0 + company1 | 07-09-2023 | 120 | | 0 + company1 | 07-10-2023 | 130 | | 0 + company2 | 07-01-2023 | 50 | | 0 + company2 | 07-02-2023 | 2000 | 2000 | 2 + company2 | 07-03-2023 | 1500 | | 0 + company2 | 07-04-2023 | 1400 | | 0 + company2 | 07-05-2023 | 1500 | | 0 + company2 | 07-06-2023 | 60 | | 0 + company2 | 07-07-2023 | 1100 | 1100 | 3 + company2 | 07-08-2023 | 1300 | | 0 + company2 | 07-09-2023 | 1200 | | 0 + company2 | 07-10-2023 | 1300 | | 0 +(20 rows) + +DEALLOCATE test_prev_offset; +-- 2-arg: two PREV with different offsets in same DEFINE clause +-- B: price exceeds both 1-back and 2-back values +SELECT company, tdate, price, + first_value(price) OVER w, last_value(price) OVER w, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE + A AS TRUE, + B AS price > PREV(price, 1) AND price > PREV(price, 2) +); + company | tdate | price | first_value | last_value | count +----------+------------+-------+-------------+------------+------- + company1 | 07-01-2023 | 100 | | | 0 + company1 | 07-02-2023 | 200 | | | 0 + company1 | 07-03-2023 | 150 | | | 0 + company1 | 07-04-2023 | 140 | | | 0 + company1 | 07-05-2023 | 150 | | | 0 + company1 | 07-06-2023 | 90 | | | 0 + company1 | 07-07-2023 | 110 | 110 | 130 | 2 + company1 | 07-08-2023 | 130 | | | 0 + company1 | 07-09-2023 | 120 | | | 0 + company1 | 07-10-2023 | 130 | | | 0 + company2 | 07-01-2023 | 50 | | | 0 + company2 | 07-02-2023 | 2000 | | | 0 + company2 | 07-03-2023 | 1500 | | | 0 + company2 | 07-04-2023 | 1400 | | | 0 + company2 | 07-05-2023 | 1500 | | | 0 + company2 | 07-06-2023 | 60 | | | 0 + company2 | 07-07-2023 | 1100 | 1100 | 1300 | 2 + company2 | 07-08-2023 | 1300 | | | 0 + company2 | 07-09-2023 | 1200 | | | 0 + company2 | 07-10-2023 | 1300 | | | 0 +(20 rows) + +-- 2-arg: PREV and NEXT with explicit offsets in same DEFINE clause +-- A: price exceeds 1-back and is below 1-ahead (ascending interior point) +SELECT company, tdate, price, + first_value(price) OVER w, last_value(price) OVER w, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS price > PREV(price, 1) AND price < NEXT(price, 1) +); + company | tdate | price | first_value | last_value | count +----------+------------+-------+-------------+------------+------- + company1 | 07-01-2023 | 100 | | | 0 + company1 | 07-02-2023 | 200 | | | 0 + company1 | 07-03-2023 | 150 | | | 0 + company1 | 07-04-2023 | 140 | | | 0 + company1 | 07-05-2023 | 150 | | | 0 + company1 | 07-06-2023 | 90 | | | 0 + company1 | 07-07-2023 | 110 | 110 | 110 | 1 + company1 | 07-08-2023 | 130 | | | 0 + company1 | 07-09-2023 | 120 | | | 0 + company1 | 07-10-2023 | 130 | | | 0 + company2 | 07-01-2023 | 50 | | | 0 + company2 | 07-02-2023 | 2000 | | | 0 + company2 | 07-03-2023 | 1500 | | | 0 + company2 | 07-04-2023 | 1400 | | | 0 + company2 | 07-05-2023 | 1500 | | | 0 + company2 | 07-06-2023 | 60 | | | 0 + company2 | 07-07-2023 | 1100 | 1100 | 1100 | 1 + company2 | 07-08-2023 | 1300 | | | 0 + company2 | 07-09-2023 | 1200 | | | 0 + company2 | 07-10-2023 | 1300 | | | 0 +(20 rows) + +-- Pass-by-ref types: two PREV calls targeting different positions. +-- Verifies that datumCopy in RESTORE prevents dangling pointers when +-- nav_slot is re-fetched for the second navigation. +-- tdate::text gives distinct text values per row (e.g. '07-01-2023'). +-- B matches when 1-back date text > 2-back date text (always true for +-- ascending dates), so B+ extends the full partition after A. +SELECT company, tdate, tdate::text AS tdate_text, + first_value(tdate::text) OVER w, last_value(tdate::text) OVER w, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE + A AS TRUE, + B AS PREV(tdate::text, 1) > PREV(tdate::text, 2) +); + company | tdate | tdate_text | first_value | last_value | count +----------+------------+------------+-------------+------------+------- + company1 | 07-01-2023 | 07-01-2023 | | | 0 + company1 | 07-02-2023 | 07-02-2023 | 07-02-2023 | 07-10-2023 | 9 + company1 | 07-03-2023 | 07-03-2023 | | | 0 + company1 | 07-04-2023 | 07-04-2023 | | | 0 + company1 | 07-05-2023 | 07-05-2023 | | | 0 + company1 | 07-06-2023 | 07-06-2023 | | | 0 + company1 | 07-07-2023 | 07-07-2023 | | | 0 + company1 | 07-08-2023 | 07-08-2023 | | | 0 + company1 | 07-09-2023 | 07-09-2023 | | | 0 + company1 | 07-10-2023 | 07-10-2023 | | | 0 + company2 | 07-01-2023 | 07-01-2023 | | | 0 + company2 | 07-02-2023 | 07-02-2023 | 07-02-2023 | 07-10-2023 | 9 + company2 | 07-03-2023 | 07-03-2023 | | | 0 + company2 | 07-04-2023 | 07-04-2023 | | | 0 + company2 | 07-05-2023 | 07-05-2023 | | | 0 + company2 | 07-06-2023 | 07-06-2023 | | | 0 + company2 | 07-07-2023 | 07-07-2023 | | | 0 + company2 | 07-08-2023 | 07-08-2023 | | | 0 + company2 | 07-09-2023 | 07-09-2023 | | | 0 + company2 | 07-10-2023 | 07-10-2023 | | | 0 +(20 rows) + +-- numeric: PREV(price::numeric, 1) > PREV(price::numeric, 2) +-- B matches when price 1-back > price 2-back (ascending pair). +SELECT company, tdate, price::numeric AS nprice, + first_value(price::numeric) OVER w, last_value(price::numeric) OVER w, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE + A AS TRUE, + B AS PREV(price::numeric, 1) > PREV(price::numeric, 2) +); + company | tdate | nprice | first_value | last_value | count +----------+------------+--------+-------------+------------+------- + company1 | 07-01-2023 | 100 | | | 0 + company1 | 07-02-2023 | 200 | 200 | 150 | 2 + company1 | 07-03-2023 | 150 | | | 0 + company1 | 07-04-2023 | 140 | | | 0 + company1 | 07-05-2023 | 150 | 150 | 90 | 2 + company1 | 07-06-2023 | 90 | | | 0 + company1 | 07-07-2023 | 110 | 110 | 120 | 3 + company1 | 07-08-2023 | 130 | | | 0 + company1 | 07-09-2023 | 120 | | | 0 + company1 | 07-10-2023 | 130 | | | 0 + company2 | 07-01-2023 | 50 | | | 0 + company2 | 07-02-2023 | 2000 | 2000 | 1500 | 2 + company2 | 07-03-2023 | 1500 | | | 0 + company2 | 07-04-2023 | 1400 | | | 0 + company2 | 07-05-2023 | 1500 | 1500 | 60 | 2 + company2 | 07-06-2023 | 60 | | | 0 + company2 | 07-07-2023 | 1100 | 1100 | 1200 | 3 + company2 | 07-08-2023 | 1300 | | | 0 + company2 | 07-09-2023 | 1200 | | | 0 + company2 | 07-10-2023 | 1300 | | | 0 +(20 rows) + +-- +-- FIRST/LAST navigation +-- +-- Test data for FIRST/LAST: values cycle back so FIRST(val) = LAST(val) +-- at specific positions. +CREATE TEMP TABLE rpr_nav (id int, val int); +INSERT INTO rpr_nav VALUES (1,10),(2,20),(3,30),(4,10),(5,50),(6,10); +-- FIRST(val) = constant: B matches when match_start has val=10 +-- match_start=1(10): A=id1, B=id2, FIRST(val)=10 -> match {1,2} +-- match_start=3(30): A=id3, B=id4, FIRST(val)=30!=10 -> no match +-- match_start=4(10): A=id4, B=id5, FIRST(val)=10 -> match {4,5} +SELECT id, val, first_value(id) OVER w AS mf, last_value(id) OVER w AS ml +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS TRUE, B AS FIRST(val) = 10 +); + id | val | mf | ml +----+-----+----+---- + 1 | 10 | 1 | 2 + 2 | 20 | | + 3 | 30 | | + 4 | 10 | 4 | 5 + 5 | 50 | | + 6 | 10 | | +(6 rows) + +-- LAST(val): always equals current row's val (offset 0 default) +-- Equivalent to: B AS val > 15 +SELECT id, val, first_value(id) OVER w AS mf, last_value(id) OVER w AS ml +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS TRUE, B AS LAST(val) > 15 +); + id | val | mf | ml +----+-----+----+---- + 1 | 10 | 1 | 2 + 2 | 20 | | + 3 | 30 | | + 4 | 10 | 4 | 5 + 5 | 50 | | + 6 | 10 | | +(6 rows) + +-- Reluctant A+? with FIRST(val) = LAST(val): find shortest match where +-- first and last rows have the same val. +-- match_start=1(10): reluctant tries B early: +-- id2(20!=10), id3(30!=10), id4(10=10) -> match {1,2,3,4} +-- match_start=5(50): id6(10!=50) -> no match +SELECT id, val, first_value(id) OVER w AS mf, last_value(id) OVER w AS ml +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+? B) + DEFINE A AS TRUE, B AS FIRST(val) = LAST(val) +); + id | val | mf | ml +----+-----+----+---- + 1 | 10 | 1 | 4 + 2 | 20 | | + 3 | 30 | | + 4 | 10 | | + 5 | 50 | | + 6 | 10 | | +(6 rows) + +-- Greedy A+ with FIRST(val) = LAST(val): find longest match where +-- first and last rows have the same val. +-- match_start=1(10): greedy A eats all, B tries last: +-- id6(10=10) -> match {1,2,3,4,5,6} +SELECT id, val, first_value(id) OVER w AS mf, last_value(id) OVER w AS ml +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS TRUE, B AS FIRST(val) = LAST(val) +); + id | val | mf | ml +----+-----+----+---- + 1 | 10 | 1 | 6 + 2 | 20 | | + 3 | 30 | | + 4 | 10 | | + 5 | 50 | | + 6 | 10 | | +(6 rows) + +-- SKIP TO NEXT ROW with FIRST(val) = LAST(val): overlapping match attempts. +-- With ONE ROW PER MATCH, each row shows only its first match result. +SELECT id, val, first_value(id) OVER w AS mf, last_value(id) OVER w AS ml +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+? B) + DEFINE A AS TRUE, B AS FIRST(val) = LAST(val) +); + id | val | mf | ml +----+-----+----+---- + 1 | 10 | 1 | 4 + 2 | 20 | | + 3 | 30 | | + 4 | 10 | 4 | 6 + 5 | 50 | | + 6 | 10 | | +(6 rows) + +-- FIRST/LAST 2-arg offset form +-- +-- FIRST(val, 0) = FIRST(val): match_start row +SELECT id, val, first_value(id) OVER w AS mf, count(*) OVER w AS cnt +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS FIRST(val, 0) = 10 +); + id | val | mf | cnt +----+-----+----+----- + 1 | 10 | 1 | 6 + 2 | 20 | | 0 + 3 | 30 | | 0 + 4 | 10 | | 0 + 5 | 50 | | 0 + 6 | 10 | | 0 +(6 rows) + +-- FIRST(val, 1): match_start + 1 row (second row of match) +-- match_start=1(10): FIRST(val,1)=20, B needs val=20 -> id2(20) match, id3(30) no +-- match_start=3(30): FIRST(val,1)=10, B needs val=10 -> id4(10) match +SELECT id, val, first_value(id) OVER w AS mf, count(*) OVER w AS cnt +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS val = FIRST(val, 1) +); + id | val | mf | cnt +----+-----+----+----- + 1 | 10 | 1 | 2 + 2 | 20 | | 0 + 3 | 30 | 3 | 2 + 4 | 10 | | 0 + 5 | 50 | 5 | 2 + 6 | 10 | | 0 +(6 rows) + +-- FIRST(val, 99): offset beyond match range -> NULL, no match +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS FIRST(val, 99) IS NOT NULL +); + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 10 | 0 + 5 | 50 | 0 + 6 | 10 | 0 +(6 rows) + +-- LAST(val, 0) = LAST(val): current row +SELECT id, val, first_value(id) OVER w AS mf, count(*) OVER w AS cnt +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS LAST(val, 0) > 15 +); + id | val | mf | cnt +----+-----+----+----- + 1 | 10 | 1 | 3 + 2 | 20 | | 0 + 3 | 30 | | 0 + 4 | 10 | 4 | 2 + 5 | 50 | | 0 + 6 | 10 | | 0 +(6 rows) + +-- LAST(val, 1): one row back from current (previous match row) +-- At B evaluation on id2: LAST(val,1) = val at id1 = 10 +-- B matches when previous row val < 30 +SELECT id, val, first_value(id) OVER w AS mf, count(*) OVER w AS cnt +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS LAST(val, 1) < 30 +); + id | val | mf | cnt +----+-----+----+----- + 1 | 10 | 1 | 3 + 2 | 20 | | 0 + 3 | 30 | | 0 + 4 | 10 | 4 | 2 + 5 | 50 | | 0 + 6 | 10 | | 0 +(6 rows) + +-- LAST(val, 99): offset before match_start -> NULL +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS LAST(val, 99) IS NOT NULL +); + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 10 | 0 + 5 | 50 | 0 + 6 | 10 | 0 +(6 rows) + +-- Error: NULL offset +SELECT id, val, count(*) OVER w FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS FIRST(val, NULL::int8) IS NULL +); +ERROR: row pattern navigation offset must not be null +-- Error: negative offset +SELECT id, val, count(*) OVER w FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS LAST(val, -1) IS NULL +); +ERROR: row pattern navigation offset must not be negative +-- FIRST/LAST outside DEFINE clause (error cases) +SELECT first(val) FROM rpr_nav; +ERROR: cannot use first outside a DEFINE clause +LINE 1: SELECT first(val) FROM rpr_nav; + ^ +SELECT last(val) FROM rpr_nav; +ERROR: cannot use last outside a DEFINE clause +LINE 1: SELECT last(val) FROM rpr_nav; + ^ +SELECT first(val, 1) FROM rpr_nav; +ERROR: cannot use first outside a DEFINE clause +LINE 1: SELECT first(val, 1) FROM rpr_nav; + ^ +-- Functional notation: should access column, not RPR navigation +CREATE TEMP TABLE rpr_names (prev int, next int, first text, last text); +INSERT INTO rpr_names VALUES (1, 2, 'Joe', 'Blow'); +SELECT prev(f), next(f), first(f), last(f) FROM rpr_names f; + prev | next | first | last +------+------+-------+------ + 1 | 2 | Joe | Blow +(1 row) + +DROP TABLE rpr_names; +-- Compound navigation: PREV(FIRST(val), M) +-- rpr_nav: (1,10),(2,20),(3,30),(4,10),(5,50),(6,10) +-- PREV(FIRST(val), 1): target = match_start + 0 - 1 = match_start - 1 +-- At match_start=1: target=0 -> out of range -> NULL +-- At match_start=3: target=2(val=20) -> 20 > 0 -> true +SELECT id, val, first_value(id) OVER w AS mf, count(*) OVER w AS cnt +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS PREV(FIRST(val), 1) > 0 +); + id | val | mf | cnt +----+-----+----+----- + 1 | 10 | | 0 + 2 | 20 | 2 | 5 + 3 | 30 | | 0 + 4 | 10 | | 0 + 5 | 50 | | 0 + 6 | 10 | | 0 +(6 rows) + +-- NEXT(FIRST(val, 1), 1): target = match_start + 1 + 1 = match_start + 2 +-- At match_start=1, B on id2: target=1+1+1=3(val=30), 30>0 -> true +SELECT id, val, first_value(id) OVER w AS mf, count(*) OVER w AS cnt +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS NEXT(FIRST(val, 1), 1) > 0 +); + id | val | mf | cnt +----+-----+----+----- + 1 | 10 | 1 | 6 + 2 | 20 | | 0 + 3 | 30 | | 0 + 4 | 10 | | 0 + 5 | 50 | | 0 + 6 | 10 | | 0 +(6 rows) + +-- PREV(LAST(val), 2): target = currentpos - 0 - 2 = currentpos - 2 +-- Same backward reach as PREV(val, 2) +SELECT id, val, first_value(id) OVER w AS mf, count(*) OVER w AS cnt +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS PREV(LAST(val), 2) IS NOT NULL +); + id | val | mf | cnt +----+-----+----+----- + 1 | 10 | | 0 + 2 | 20 | 2 | 5 + 3 | 30 | | 0 + 4 | 10 | | 0 + 5 | 50 | | 0 + 6 | 10 | | 0 +(6 rows) + +-- NEXT(LAST(val, 1), 2): target = currentpos - 1 + 2 = currentpos + 1 +-- Looks 1 row ahead: same as NEXT(val, 1) +SELECT id, val, first_value(id) OVER w AS mf, count(*) OVER w AS cnt +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS NEXT(LAST(val, 1), 2) IS NOT NULL +); + id | val | mf | cnt +----+-----+----+----- + 1 | 10 | 1 | 5 + 2 | 20 | | 0 + 3 | 30 | | 0 + 4 | 10 | | 0 + 5 | 50 | | 0 + 6 | 10 | | 0 +(6 rows) + +-- Compound: outer offset beyond partition (PREV far back) +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE A AS TRUE, B AS PREV(FIRST(val), 99) IS NOT NULL +); + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 10 | 0 + 5 | 50 | 0 + 6 | 10 | 0 +(6 rows) + +-- Compound: outer offset beyond partition (NEXT far forward) +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE A AS TRUE, B AS NEXT(FIRST(val), 99) IS NOT NULL +); + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 10 | 0 + 5 | 50 | 0 + 6 | 10 | 0 +(6 rows) + +-- Compound: inner offset beyond match range (FIRST offset too large) +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE A AS TRUE, B AS PREV(FIRST(val, 99), 1) IS NOT NULL +); + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 10 | 0 + 5 | 50 | 0 + 6 | 10 | 0 +(6 rows) + +-- Compound: inner offset beyond match range (LAST offset too large) +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE A AS TRUE, B AS NEXT(LAST(val, 99), 1) IS NOT NULL +); + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 10 | 0 + 5 | 50 | 0 + 6 | 10 | 0 +(6 rows) + +-- Compound: NULL outer offset (runtime error) +SELECT id, val, count(*) OVER w FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS PREV(FIRST(val), NULL::int8) IS NULL +); +ERROR: row pattern navigation offset must not be null +-- Compound: negative outer offset (runtime error) +SELECT id, val, count(*) OVER w FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS NEXT(LAST(val), -1) IS NULL +); +ERROR: row pattern navigation offset must not be negative +-- Compound: default offsets on both sides +-- PREV(FIRST(val)): inner=0 (match_start), outer=1 -> target = match_start - 1 +SELECT id, val, first_value(id) OVER w AS mf, count(*) OVER w AS cnt +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS PREV(FIRST(val)) IS NOT NULL +); + id | val | mf | cnt +----+-----+----+----- + 1 | 10 | | 0 + 2 | 20 | 2 | 5 + 3 | 30 | | 0 + 4 | 10 | | 0 + 5 | 50 | | 0 + 6 | 10 | | 0 +(6 rows) + +-- NEXT(LAST(val)): inner=0 (currentpos), outer=1 -> target = currentpos + 1 +SELECT id, val, first_value(id) OVER w AS mf, count(*) OVER w AS cnt +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS NEXT(LAST(val)) IS NOT NULL +); + id | val | mf | cnt +----+-----+----+----- + 1 | 10 | 1 | 5 + 2 | 20 | | 0 + 3 | 30 | | 0 + 4 | 10 | | 0 + 5 | 50 | | 0 + 6 | 10 | | 0 +(6 rows) + +-- Compound: inner NULL offset (runtime error) +SELECT id, val, count(*) OVER w FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS PREV(FIRST(val, NULL::int8), 1) IS NULL +); +ERROR: row pattern navigation offset must not be null +-- Compound: inner negative offset (runtime error) +SELECT id, val, count(*) OVER w FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS NEXT(LAST(val, -1), 1) IS NULL +); +ERROR: row pattern navigation offset must not be negative +-- Compound + host variable offsets +PREPARE test_compound_offset(int8, int8) AS +SELECT id, val, first_value(id) OVER w AS mf, count(*) OVER w AS cnt +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS PREV(FIRST(val, $1), $2) IS NOT NULL +); +EXECUTE test_compound_offset(0, 1); + id | val | mf | cnt +----+-----+----+----- + 1 | 10 | | 0 + 2 | 20 | 2 | 5 + 3 | 30 | | 0 + 4 | 10 | | 0 + 5 | 50 | | 0 + 6 | 10 | | 0 +(6 rows) + +EXECUTE test_compound_offset(1, 1); + id | val | mf | cnt +----+-----+----+----- + 1 | 10 | 1 | 6 + 2 | 20 | | 0 + 3 | 30 | | 0 + 4 | 10 | | 0 + 5 | 50 | | 0 + 6 | 10 | | 0 +(6 rows) + +DEALLOCATE test_compound_offset; +-- Compound + SKIP TO NEXT ROW: overlapping matches with PREV(FIRST()) +SELECT id, val, first_value(id) OVER w AS mf, count(*) OVER w AS cnt +FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS PREV(FIRST(val), 1) > 0 +); + id | val | mf | cnt +----+-----+----+----- + 1 | 10 | | 0 + 2 | 20 | 2 | 5 + 3 | 30 | 3 | 4 + 4 | 10 | 4 | 3 + 5 | 50 | 5 | 2 + 6 | 10 | | 0 +(6 rows) + +-- Compound + multiple partitions +CREATE TEMP TABLE rpr_nav_part (gid int, id int, val int); +INSERT INTO rpr_nav_part VALUES + (1,1,10),(1,2,20),(1,3,30), + (2,1,40),(2,2,50),(2,3,60); +SELECT gid, id, val, first_value(id) OVER w AS mf, count(*) OVER w AS cnt +FROM rpr_nav_part WINDOW w AS ( + PARTITION BY gid ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS NEXT(FIRST(val), 1) > 0 +); + gid | id | val | mf | cnt +-----+----+-----+----+----- + 1 | 1 | 10 | 1 | 3 + 1 | 2 | 20 | | 0 + 1 | 3 | 30 | | 0 + 2 | 1 | 40 | 1 | 3 + 2 | 2 | 50 | | 0 + 2 | 3 | 60 | | 0 +(6 rows) + +DROP TABLE rpr_nav_part; +-- Reverse nesting: FIRST wrapping PREV is prohibited +SELECT id, val FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B) + DEFINE A AS TRUE, B AS FIRST(PREV(val)) > 0 +); +ERROR: FIRST and LAST cannot contain PREV or NEXT +LINE 5: DEFINE A AS TRUE, B AS FIRST(PREV(val)) > 0 + ^ +HINT: Only PREV(FIRST()), PREV(LAST()), NEXT(FIRST()), and NEXT(LAST()) compound forms are allowed. +-- Reverse nesting: LAST wrapping NEXT is prohibited +SELECT id, val FROM rpr_nav WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B) + DEFINE A AS TRUE, B AS LAST(NEXT(val)) > 0 +); +ERROR: FIRST and LAST cannot contain PREV or NEXT +LINE 5: DEFINE A AS TRUE, B AS LAST(NEXT(val)) > 0 + ^ +HINT: Only PREV(FIRST()), PREV(LAST()), NEXT(FIRST()), and NEXT(LAST()) compound forms are allowed. +DROP TABLE rpr_nav; +-- +-- SKIP TO / Backtracking / Frame boundary +-- +-- match everything +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (A+) + DEFINE + A AS TRUE +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 130 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 1300 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- nth_value beyond reduced frame (no IGNORE NULLS) +-- Tests WinGetSlotInFrame/WinGetFuncArgInFrame out-of-frame with RPR +SELECT company, tdate, price, + nth_value(price, 5) OVER w AS nth_5 +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | nth_5 +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | + company1 | 07-02-2023 | 200 | + company1 | 07-03-2023 | 150 | + company1 | 07-04-2023 | 140 | + company1 | 07-05-2023 | 150 | + company1 | 07-06-2023 | 90 | + company1 | 07-07-2023 | 110 | + company1 | 07-08-2023 | 130 | + company1 | 07-09-2023 | 120 | + company1 | 07-10-2023 | 130 | + company2 | 07-01-2023 | 50 | + company2 | 07-02-2023 | 2000 | + company2 | 07-03-2023 | 1500 | + company2 | 07-04-2023 | 1400 | + company2 | 07-05-2023 | 1500 | + company2 | 07-06-2023 | 60 | + company2 | 07-07-2023 | 1100 | + company2 | 07-08-2023 | 1300 | + company2 | 07-09-2023 | 1200 | + company2 | 07-10-2023 | 1300 | +(20 rows) + +-- backtracking with reclassification of rows +-- using AFTER MATCH SKIP PAST LAST ROW +SELECT company, tdate, price, first_value(tdate) OVER w, last_value(tdate) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (A+ B+) + DEFINE + A AS price > 100, + B AS price > 100 +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | | + company1 | 07-02-2023 | 200 | 07-02-2023 | 07-05-2023 + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 07-07-2023 | 07-10-2023 + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | | + company2 | 07-02-2023 | 2000 | 07-02-2023 | 07-05-2023 + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 07-07-2023 | 07-10-2023 + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- backtracking with reclassification of rows +-- using AFTER MATCH SKIP TO NEXT ROW +SELECT company, tdate, price, first_value(tdate) OVER w, last_value(tdate) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + INITIAL + PATTERN (A+ B+) + DEFINE + A AS price > 100, + B AS price > 100 +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | | + company1 | 07-02-2023 | 200 | 07-02-2023 | 07-05-2023 + company1 | 07-03-2023 | 150 | 07-03-2023 | 07-05-2023 + company1 | 07-04-2023 | 140 | 07-04-2023 | 07-05-2023 + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 07-07-2023 | 07-10-2023 + company1 | 07-08-2023 | 130 | 07-08-2023 | 07-10-2023 + company1 | 07-09-2023 | 120 | 07-09-2023 | 07-10-2023 + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | | + company2 | 07-02-2023 | 2000 | 07-02-2023 | 07-05-2023 + company2 | 07-03-2023 | 1500 | 07-03-2023 | 07-05-2023 + company2 | 07-04-2023 | 1400 | 07-04-2023 | 07-05-2023 + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 07-07-2023 | 07-10-2023 + company2 | 07-08-2023 | 1300 | 07-08-2023 | 07-10-2023 + company2 | 07-09-2023 | 1200 | 07-09-2023 | 07-10-2023 + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- SKIP TO NEXT ROW with limited frame (Ishii-san's test case) +-- Each row should produce its own match within its frame +WITH data AS ( + SELECT * FROM (VALUES + ('A', 1), ('A', 2), + ('B', 3), ('B', 4) + ) AS t(gid, id) +) +SELECT gid, id, array_agg(id) OVER w +FROM data +WINDOW w AS ( + PARTITION BY gid + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE A AS id < 10 +); + gid | id | array_agg +-----+----+----------- + A | 1 | {1,2} + A | 2 | {2} + B | 3 | {3,4} + B | 4 | {4} +(4 rows) + +-- Limited frame with absorption test +-- Row 0: frame [0,2], can't see B at row 3 -> no match +-- Row 1: frame [1,3], can see A A B -> should match rows 1-3 +WITH frame_absorb_test AS ( + SELECT * FROM (VALUES + (0, 'A'), (1, 'A'), (2, 'A'), (3, 'B') + ) AS t(id, flag) +) +SELECT id, flag, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM frame_absorb_test +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS flag = 'A', + B AS flag = 'B' +); + id | flag | match_start | match_end +----+------+-------------+----------- + 0 | A | | + 1 | A | 1 | 3 + 2 | A | | + 3 | B | | +(4 rows) + +-- ROWS BETWEEN CURRENT ROW AND offset FOLLOWING +SELECT company, tdate, price, first_value(tdate) OVER w, last_value(tdate) OVER w, + count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | count +----------+------------+-------+-------------+------------+------- + company1 | 07-01-2023 | 100 | 07-01-2023 | 07-03-2023 | 3 + company1 | 07-02-2023 | 200 | | | 0 + company1 | 07-03-2023 | 150 | | | 0 + company1 | 07-04-2023 | 140 | 07-04-2023 | 07-06-2023 | 3 + company1 | 07-05-2023 | 150 | | | 0 + company1 | 07-06-2023 | 90 | | | 0 + company1 | 07-07-2023 | 110 | 07-07-2023 | 07-09-2023 | 3 + company1 | 07-08-2023 | 130 | | | 0 + company1 | 07-09-2023 | 120 | | | 0 + company1 | 07-10-2023 | 130 | | | 0 + company2 | 07-01-2023 | 50 | 07-01-2023 | 07-03-2023 | 3 + company2 | 07-02-2023 | 2000 | | | 0 + company2 | 07-03-2023 | 1500 | | | 0 + company2 | 07-04-2023 | 1400 | 07-04-2023 | 07-06-2023 | 3 + company2 | 07-05-2023 | 1500 | | | 0 + company2 | 07-06-2023 | 60 | | | 0 + company2 | 07-07-2023 | 1100 | 07-07-2023 | 07-09-2023 | 3 + company2 | 07-08-2023 | 1300 | | | 0 + company2 | 07-09-2023 | 1200 | | | 0 + company2 | 07-10-2023 | 1300 | | | 0 +(20 rows) + +-- +-- Aggregates +-- +-- using AFTER MATCH SKIP PAST LAST ROW +SELECT company, tdate, price, + first_value(price) OVER w, + last_value(price) OVER w, + max(price) OVER w, + min(price) OVER w, + sum(price) OVER w, + avg(price) OVER w, + count(price) OVER w +FROM stock +WINDOW w AS ( +PARTITION BY company +ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING +AFTER MATCH SKIP PAST LAST ROW +INITIAL +PATTERN (START UP+ DOWN+) +DEFINE +START AS TRUE, +UP AS price > PREV(price), +DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | max | min | sum | avg | count +----------+------------+-------+-------------+------------+------+-----+------+-----------------------+------- + company1 | 07-01-2023 | 100 | 100 | 140 | 200 | 100 | 590 | 147.5000000000000000 | 4 + company1 | 07-02-2023 | 200 | | | | | | | 0 + company1 | 07-03-2023 | 150 | | | | | | | 0 + company1 | 07-04-2023 | 140 | | | | | | | 0 + company1 | 07-05-2023 | 150 | | | | | | | 0 + company1 | 07-06-2023 | 90 | 90 | 120 | 130 | 90 | 450 | 112.5000000000000000 | 4 + company1 | 07-07-2023 | 110 | | | | | | | 0 + company1 | 07-08-2023 | 130 | | | | | | | 0 + company1 | 07-09-2023 | 120 | | | | | | | 0 + company1 | 07-10-2023 | 130 | | | | | | | 0 + company2 | 07-01-2023 | 50 | 50 | 1400 | 2000 | 50 | 4950 | 1237.5000000000000000 | 4 + company2 | 07-02-2023 | 2000 | | | | | | | 0 + company2 | 07-03-2023 | 1500 | | | | | | | 0 + company2 | 07-04-2023 | 1400 | | | | | | | 0 + company2 | 07-05-2023 | 1500 | | | | | | | 0 + company2 | 07-06-2023 | 60 | 60 | 1200 | 1300 | 60 | 3660 | 915.0000000000000000 | 4 + company2 | 07-07-2023 | 1100 | | | | | | | 0 + company2 | 07-08-2023 | 1300 | | | | | | | 0 + company2 | 07-09-2023 | 1200 | | | | | | | 0 + company2 | 07-10-2023 | 1300 | | | | | | | 0 +(20 rows) + +-- using AFTER MATCH SKIP TO NEXT ROW +SELECT company, tdate, price, + first_value(price) OVER w, + last_value(price) OVER w, + max(price) OVER w, + min(price) OVER w, + sum(price) OVER w, + avg(price) OVER w, + count(price) OVER w +FROM stock +WINDOW w AS ( +PARTITION BY company +ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING +AFTER MATCH SKIP TO NEXT ROW +INITIAL +PATTERN (START UP+ DOWN+) +DEFINE +START AS TRUE, +UP AS price > PREV(price), +DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | max | min | sum | avg | count +----------+------------+-------+-------------+------------+------+------+------+-----------------------+------- + company1 | 07-01-2023 | 100 | 100 | 140 | 200 | 100 | 590 | 147.5000000000000000 | 4 + company1 | 07-02-2023 | 200 | | | | | | | 0 + company1 | 07-03-2023 | 150 | | | | | | | 0 + company1 | 07-04-2023 | 140 | 140 | 90 | 150 | 90 | 380 | 126.6666666666666667 | 3 + company1 | 07-05-2023 | 150 | | | | | | | 0 + company1 | 07-06-2023 | 90 | 90 | 120 | 130 | 90 | 450 | 112.5000000000000000 | 4 + company1 | 07-07-2023 | 110 | 110 | 120 | 130 | 110 | 360 | 120.0000000000000000 | 3 + company1 | 07-08-2023 | 130 | | | | | | | 0 + company1 | 07-09-2023 | 120 | | | | | | | 0 + company1 | 07-10-2023 | 130 | | | | | | | 0 + company2 | 07-01-2023 | 50 | 50 | 1400 | 2000 | 50 | 4950 | 1237.5000000000000000 | 4 + company2 | 07-02-2023 | 2000 | | | | | | | 0 + company2 | 07-03-2023 | 1500 | | | | | | | 0 + company2 | 07-04-2023 | 1400 | 1400 | 60 | 1500 | 60 | 2960 | 986.6666666666666667 | 3 + company2 | 07-05-2023 | 1500 | | | | | | | 0 + company2 | 07-06-2023 | 60 | 60 | 1200 | 1300 | 60 | 3660 | 915.0000000000000000 | 4 + company2 | 07-07-2023 | 1100 | 1100 | 1200 | 1300 | 1100 | 3600 | 1200.0000000000000000 | 3 + company2 | 07-08-2023 | 1300 | | | | | | | 0 + company2 | 07-09-2023 | 1200 | | | | | | | 0 + company2 | 07-10-2023 | 1300 | | | | | | | 0 +(20 rows) + +-- row_number() within RPR reduced frame +SELECT company, tdate, price, row_number() OVER w, count(*) OVER w +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | row_number | count +----------+------------+-------+------------+------- + company1 | 07-01-2023 | 100 | 1 | 4 + company1 | 07-02-2023 | 200 | 2 | 0 + company1 | 07-03-2023 | 150 | 3 | 0 + company1 | 07-04-2023 | 140 | 4 | 0 + company1 | 07-05-2023 | 150 | 5 | 0 + company1 | 07-06-2023 | 90 | 6 | 4 + company1 | 07-07-2023 | 110 | 7 | 0 + company1 | 07-08-2023 | 130 | 8 | 0 + company1 | 07-09-2023 | 120 | 9 | 0 + company1 | 07-10-2023 | 130 | 10 | 0 + company2 | 07-01-2023 | 50 | 1 | 4 + company2 | 07-02-2023 | 2000 | 2 | 0 + company2 | 07-03-2023 | 1500 | 3 | 0 + company2 | 07-04-2023 | 1400 | 4 | 0 + company2 | 07-05-2023 | 1500 | 5 | 0 + company2 | 07-06-2023 | 60 | 6 | 4 + company2 | 07-07-2023 | 1100 | 7 | 0 + company2 | 07-08-2023 | 1300 | 8 | 0 + company2 | 07-09-2023 | 1200 | 9 | 0 + company2 | 07-10-2023 | 1300 | 10 | 0 +(20 rows) + +-- +-- SQL Integration: JOIN, CTE, LATERAL +-- +-- JOIN case +CREATE TEMP TABLE t1 (i int, v1 int); +CREATE TEMP TABLE t2 (j int, v2 int); +INSERT INTO t1 VALUES(1,10); +INSERT INTO t1 VALUES(1,11); +INSERT INTO t1 VALUES(1,12); +INSERT INTO t2 VALUES(2,10); +INSERT INTO t2 VALUES(2,11); +INSERT INTO t2 VALUES(2,12); +SELECT * FROM t1, t2 WHERE t1.v1 <= 11 AND t2.v2 <= 11; + i | v1 | j | v2 +---+----+---+---- + 1 | 10 | 2 | 10 + 1 | 10 | 2 | 11 + 1 | 11 | 2 | 10 + 1 | 11 | 2 | 11 +(4 rows) + +SELECT *, count(*) OVER w FROM t1, t2 +WINDOW w AS ( + PARTITION BY t1.i + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A) + DEFINE + A AS v1 <= 11 AND v2 <= 11 +); + i | v1 | j | v2 | count +---+----+---+----+------- + 1 | 10 | 2 | 10 | 1 + 1 | 10 | 2 | 11 | 1 + 1 | 10 | 2 | 12 | 0 + 1 | 11 | 2 | 10 | 1 + 1 | 11 | 2 | 11 | 1 + 1 | 11 | 2 | 12 | 0 + 1 | 12 | 2 | 10 | 0 + 1 | 12 | 2 | 11 | 0 + 1 | 12 | 2 | 12 | 0 +(9 rows) + +-- WITH case +WITH wstock AS ( + SELECT * FROM stock WHERE tdate < '2023-07-08' +) +SELECT tdate, price, +first_value(tdate) OVER w, +count(*) OVER w + FROM wstock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + tdate | price | first_value | count +------------+-------+-------------+------- + 07-01-2023 | 100 | 07-01-2023 | 4 + 07-02-2023 | 200 | | 0 + 07-03-2023 | 150 | | 0 + 07-04-2023 | 140 | | 0 + 07-05-2023 | 150 | | 0 + 07-06-2023 | 90 | | 0 + 07-07-2023 | 110 | | 0 + 07-01-2023 | 50 | 07-01-2023 | 4 + 07-02-2023 | 2000 | | 0 + 07-03-2023 | 1500 | | 0 + 07-04-2023 | 1400 | | 0 + 07-05-2023 | 1500 | | 0 + 07-06-2023 | 60 | | 0 + 07-07-2023 | 1100 | | 0 +(14 rows) + +-- ReScan test: LATERAL join forces WindowAgg rescan with RPR +-- Tests ExecReScanWindowAgg clearing nav_slot +SELECT g.x, sub.* +FROM generate_series(1, 2) g(x), +LATERAL ( + SELECT id, price, count(*) OVER w AS c + FROM (VALUES (1, 100), (2, 200), (3, 150)) AS t(id, price) + WHERE id <= g.x + 1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (START UP+) + DEFINE + START AS TRUE, + UP AS price > PREV(price) + ) +) sub +ORDER BY g.x, sub.id; + x | id | price | c +---+----+-------+--- + 1 | 1 | 100 | 2 + 1 | 2 | 200 | 0 + 2 | 1 | 100 | 2 + 2 | 2 | 200 | 0 + 2 | 3 | 150 | 0 +(5 rows) + +-- PREV has multiple column reference +CREATE TEMP TABLE rpr1 (id INTEGER, i SERIAL, j INTEGER); +INSERT INTO rpr1(id, j) SELECT 1, g*2 FROM generate_series(1, 10) AS g; +SELECT id, i, j, count(*) OVER w + FROM rpr1 + WINDOW w AS ( + PARTITION BY id + ORDER BY i + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (START COND+) + DEFINE + START AS TRUE, + COND AS PREV(i + j + 1) < 10 +); + id | i | j | count +----+----+----+------- + 1 | 1 | 2 | 3 + 1 | 2 | 4 | 0 + 1 | 3 | 6 | 0 + 1 | 4 | 8 | 0 + 1 | 5 | 10 | 0 + 1 | 6 | 12 | 0 + 1 | 7 | 14 | 0 + 1 | 8 | 16 | 0 + 1 | 9 | 18 | 0 + 1 | 10 | 20 | 0 +(10 rows) + +-- +-- Large-scale / scalability tests +-- +-- Smoke test for larger partitions. +WITH s AS ( + SELECT v, count(*) OVER w AS c + FROM (SELECT generate_series(1, 5000) v) + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ( r+ ) + DEFINE r AS TRUE + ) +) +-- Should be exactly one long match across all rows. +SELECT * FROM s WHERE c > 0; + v | c +---+------ + 1 | 5000 +(1 row) + +WITH s AS ( + SELECT v, count(*) OVER w AS c + FROM (SELECT generate_series(1, 5000) v) + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ( r ) + DEFINE r AS TRUE + ) +) +-- Every row should be its own match. +SELECT count(*) FROM s WHERE c > 0; + count +------- + 5000 +(1 row) + +-- Large partition test: 100K rows with A+ B* C{10000,} pattern +-- Tests that int32 count doesn't overflow with large repetitions +WITH data AS ( + SELECT generate_series(0, 100000) AS v +), +result AS ( + SELECT v, + count(*) OVER w AS match_len, + first_value(v) OVER w AS match_first, + last_value(v) OVER w AS match_last + FROM data + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (A+ B* C{10000,}) + DEFINE + A AS v < 33333, + B AS v >= 33333 AND v < 66666, + C AS v >= 66666 AND v < 99999 + ) +) +-- Should match: A (33333 rows) + B (33333 rows) + C (33333 rows) = 99999 rows +SELECT match_first, match_last, match_len FROM result WHERE match_len > 0; + match_first | match_last | match_len +-------------+------------+----------- + 0 | 99998 | 99999 +(1 row) + +-- JIT PREV/NEXT navigation test: 100K rows with PREV in DEFINE. +-- Exercises EEOP_RPR_NAV_SET/RESTORE JIT code paths (has_rpr_nav reload) +-- at scale. V-shape: price rises then falls, repeated across partition. +SET jit = on; +SET jit_above_cost = 0; +WITH data AS ( + SELECT i, abs(50000 - i) AS price + FROM generate_series(1, 100000) i +), +result AS ( + SELECT i, price, + count(*) OVER w AS match_len, + first_value(price) OVER w AS match_first + FROM data + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (DOWN+ UP+) + DEFINE + DOWN AS price < PREV(price), + UP AS price > PREV(price) + ) +) +SELECT count(*) AS matched_rows, max(match_len) AS longest_match +FROM result WHERE match_len > 0; + matched_rows | longest_match +--------------+--------------- + 1 | 99999 +(1 row) + +RESET jit_above_cost; +RESET jit; +-- JIT compound navigation test +SET jit = on; +SET jit_above_cost = 0; +SELECT count(*) AS matched_rows +FROM ( + SELECT v, count(*) OVER w AS match_len + FROM generate_series(1, 1000) AS t(v) + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS PREV(FIRST(v), 1) > 0 + ) +) sub WHERE match_len > 0; + matched_rows +-------------- + 1 +(1 row) + +RESET jit_above_cost; +RESET jit; +-- +-- IGNORE NULLS +-- +-- no NULL rows case. The result should be identical with "basic test using PREV" +SELECT company, tdate, price, first_value(price) IGNORE NULLS OVER w, + last_value(price) IGNORE NULLS OVER w, + nth_value(tdate, 2) IGNORE NULLS OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | nth_second +----------+------------+-------+-------------+------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 | 07-02-2023 + company1 | 07-02-2023 | 200 | | | + company1 | 07-03-2023 | 150 | | | + company1 | 07-04-2023 | 140 | | | + company1 | 07-05-2023 | 150 | | | + company1 | 07-06-2023 | 90 | 90 | 120 | 07-07-2023 + company1 | 07-07-2023 | 110 | | | + company1 | 07-08-2023 | 130 | | | + company1 | 07-09-2023 | 120 | | | + company1 | 07-10-2023 | 130 | | | + company2 | 07-01-2023 | 50 | 50 | 1400 | 07-02-2023 + company2 | 07-02-2023 | 2000 | | | + company2 | 07-03-2023 | 1500 | | | + company2 | 07-04-2023 | 1400 | | | + company2 | 07-05-2023 | 1500 | | | + company2 | 07-06-2023 | 60 | 60 | 1200 | 07-07-2023 + company2 | 07-07-2023 | 1100 | | | + company2 | 07-08-2023 | 1300 | | | + company2 | 07-09-2023 | 1200 | | | + company2 | 07-10-2023 | 1300 | | | +(20 rows) + +-- nth_value with IGNORE NULLS option wants to find the second row but +-- due to a NULL in the middle, it returns the third row. +WITH data AS ( + SELECT * FROM (VALUES + (10, 1), (11, NULL), (12, 3), (13, 4) + ) AS t(gid, id)) + SELECT gid, id, nth_value(id, 2) IGNORE NULLS OVER w AS second_val, + array_agg(id) OVER w + FROM data + WINDOW w AS ( + ORDER BY gid + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS gid < 13 + ); + gid | id | second_val | array_agg +-----+----+------------+------------ + 10 | 1 | 3 | {1,NULL,3} + 11 | | | + 12 | 3 | | + 13 | 4 | | +(4 rows) + +-- nth_value with IGNORE NULLS option wants to find the third row but +-- due to a NULL in the middle, it reaches the end of reduced frame and +-- returns NULL +WITH data AS ( + SELECT * FROM (VALUES + (10, 1), (11, NULL), (12, 3), (13, 4) + ) AS t(gid, id)) + SELECT gid, id, nth_value(id, 3) IGNORE NULLS OVER w AS thrid_val, + array_agg(id) OVER w + FROM data + WINDOW w AS ( + ORDER BY gid + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS gid < 13 + ); + gid | id | thrid_val | array_agg +-----+----+-----------+------------ + 10 | 1 | | {1,NULL,3} + 11 | | | + 12 | 3 | | + 13 | 4 | | +(4 rows) + +-- nth_value beyond reduced frame with IGNORE NULLS +-- Tests ignorenulls_getfuncarginframe early out-of-frame check +SELECT company, tdate, price, + nth_value(price, 5) IGNORE NULLS OVER w AS nth_5_in +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | nth_5_in +----------+------------+-------+---------- + company1 | 07-01-2023 | 100 | + company1 | 07-02-2023 | 200 | + company1 | 07-03-2023 | 150 | + company1 | 07-04-2023 | 140 | + company1 | 07-05-2023 | 150 | + company1 | 07-06-2023 | 90 | + company1 | 07-07-2023 | 110 | + company1 | 07-08-2023 | 130 | + company1 | 07-09-2023 | 120 | + company1 | 07-10-2023 | 130 | + company2 | 07-01-2023 | 50 | + company2 | 07-02-2023 | 2000 | + company2 | 07-03-2023 | 1500 | + company2 | 07-04-2023 | 1400 | + company2 | 07-05-2023 | 1500 | + company2 | 07-06-2023 | 60 | + company2 | 07-07-2023 | 1100 | + company2 | 07-08-2023 | 1300 | + company2 | 07-09-2023 | 1200 | + company2 | 07-10-2023 | 1300 | +(20 rows) + +-- IGNORE NULLS + first_value where first value in reduced frame is NULL +WITH data AS ( + SELECT * FROM (VALUES + (1, NULL), (2, NULL), (3, 30), (4, 40) + ) AS t(id, val)) +SELECT id, val, + first_value(val) IGNORE NULLS OVER w AS fv_ignull, + count(*) OVER w +FROM data +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS TRUE +); + id | val | fv_ignull | count +----+-----+-----------+------- + 1 | | 30 | 4 + 2 | | | 0 + 3 | 30 | | 0 + 4 | 40 | | 0 +(4 rows) + +-- IGNORE NULLS + all values NULL in reduced frame +WITH data AS ( + SELECT * FROM (VALUES + (1, NULL), (2, NULL), (3, NULL) + ) AS t(id, val)) +SELECT id, val, + first_value(val) IGNORE NULLS OVER w AS fv_ignull, + last_value(val) IGNORE NULLS OVER w AS lv_ignull, + count(*) OVER w +FROM data +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS TRUE +); + id | val | fv_ignull | lv_ignull | count +----+-----+-----------+-----------+------- + 1 | | | | 3 + 2 | | | | 0 + 3 | | | | 0 +(3 rows) + +-- +-- last_value IGNORE NULLS with reduced frame containing all NULLs +-- Exercises ignorenulls_getfuncarginframe SEEK_TAIL out-of-frame path +-- when notnull_relpos >= num_reduced_frame. +-- +CREATE TEMP TABLE rpr_nullval (id INT, val INT); +INSERT INTO rpr_nullval VALUES (1, 10), (2, NULL), (3, NULL), (4, 20); +SELECT id, val, + last_value(val) IGNORE NULLS OVER w AS lv_ignull, + count(*) OVER w AS cnt +FROM rpr_nullval +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE + A AS val IS NOT NULL, + B AS val IS NULL +); + id | val | lv_ignull | cnt +----+-----+-----------+----- + 1 | 10 | 10 | 3 + 2 | | | 0 + 3 | | | 0 + 4 | 20 | | 0 +(4 rows) + +-- +-- NULL handling +-- +CREATE TEMP TABLE stock_null (company TEXT, tdate DATE, price INTEGER); +INSERT INTO stock_null VALUES ('c1', '2023-07-01', 100); +INSERT INTO stock_null VALUES ('c1', '2023-07-02', NULL); -- NULL in middle +INSERT INTO stock_null VALUES ('c1', '2023-07-03', 200); +INSERT INTO stock_null VALUES ('c1', '2023-07-04', 150); +SELECT company, tdate, price, count(*) OVER w AS match_count +FROM stock_null +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (START UP DOWN) + DEFINE START AS TRUE, UP AS price > PREV(price), DOWN AS price < +PREV(price) +); + company | tdate | price | match_count +---------+------------+-------+------------- + c1 | 07-01-2023 | 100 | 0 + c1 | 07-02-2023 | | 0 + c1 | 07-03-2023 | 200 | 0 + c1 | 07-04-2023 | 150 | 0 +(4 rows) + +-- Consecutive NULLs: PREV navigates through NULL values +CREATE TEMP TABLE rpr_consec_null (id INT, val INT); +INSERT INTO rpr_consec_null VALUES + (1, 100), (2, NULL), (3, NULL), (4, NULL), (5, 200), (6, 300); +-- PREV(val) IS NULL succeeds for both null_slot (first row) and actual NULL +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_consec_null +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+ C) + DEFINE + A AS val IS NULL, + B AS val IS NULL AND PREV(val) IS NULL, + C AS val IS NOT NULL +); + id | val | cnt +----+-----+----- + 1 | 100 | 0 + 2 | | 4 + 3 | | 0 + 4 | | 0 + 5 | 200 | 0 + 6 | 300 | 0 +(6 rows) + +-- NEXT(val) through consecutive NULLs +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_consec_null +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+ C) + DEFINE + A AS val IS NOT NULL, + B AS val IS NULL AND NEXT(val) IS NULL, + C AS val IS NULL AND NEXT(val) IS NOT NULL +); + id | val | cnt +----+-----+----- + 1 | 100 | 4 + 2 | | 0 + 3 | | 0 + 4 | | 0 + 5 | 200 | 0 + 6 | 300 | 0 +(6 rows) + +DROP TABLE rpr_consec_null; +-- ============================================================ +-- Stock Scenario Tests (1632 rows, partitioned regions) +-- ============================================================ +-- Consecutive rising days: find streaks of 7+ days +SELECT * FROM ( + SELECT first_value(rn) OVER w AS start_rn, + last_value(rn) OVER w AS end_rn, + count(*) OVER w AS days + FROM rpr_stock + WINDOW w AS ( + PARTITION BY part_id + ORDER BY rn + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (UP{7,}) + DEFINE UP AS price > PREV(price) + ) +) t WHERE days > 0 ORDER BY start_rn; + start_rn | end_rn | days +----------+--------+------ + 29 | 35 | 7 + 38 | 44 | 7 + 96 | 102 | 7 + 118 | 125 | 8 + 308 | 317 | 10 + 328 | 334 | 7 + 475 | 481 | 7 + 491 | 497 | 7 + 509 | 517 | 9 + 536 | 542 | 7 + 586 | 592 | 7 + 643 | 650 | 8 + 740 | 746 | 7 + 753 | 760 | 8 + 904 | 910 | 7 + 956 | 965 | 10 + 985 | 991 | 7 + 1095 | 1101 | 7 + 1104 | 1110 | 7 + 1181 | 1187 | 7 + 1221 | 1228 | 8 + 1262 | 1268 | 7 + 1272 | 1278 | 7 + 1373 | 1380 | 8 + 1434 | 1440 | 7 + 1485 | 1491 | 7 + 1553 | 1559 | 7 + 1576 | 1582 | 7 + 1624 | 1631 | 8 +(29 rows) + +-- V-shape recovery: 4+ days decline followed by 4+ days rise +SELECT * FROM ( + SELECT first_value(rn) OVER w AS start_rn, + last_value(rn) OVER w AS end_rn, + first_value(price) OVER w AS start_price, + last_value(price) OVER w AS end_price, + count(*) OVER w AS days + FROM rpr_stock + WINDOW w AS ( + PARTITION BY part_id + ORDER BY rn + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (DECLINE{4,} RISE{4,}) + DEFINE + DECLINE AS price < PREV(price), + RISE AS price > PREV(price) + ) +) t WHERE days > 0 ORDER BY start_rn; + start_rn | end_rn | start_price | end_price | days +----------+--------+-------------+-----------+------ + 84 | 93 | 301.000 | 309.750 | 10 + 114 | 125 | 394.500 | 418.750 | 12 + 173 | 183 | 260.000 | 282.000 | 11 + 204 | 214 | 179.000 | 171.500 | 11 + 262 | 271 | 277.625 | 275.500 | 10 + 337 | 344 | 308.250 | 308.125 | 8 + 436 | 444 | 56.630 | 60.250 | 9 + 567 | 575 | 126.500 | 127.500 | 9 + 598 | 607 | 112.250 | 116.500 | 10 + 653 | 660 | 129.375 | 128.125 | 8 + 663 | 671 | 125.630 | 130.250 | 9 + 685 | 693 | 120.130 | 124.000 | 9 + 710 | 719 | 141.125 | 144.130 | 10 + 833 | 841 | 106.500 | 111.125 | 9 + 853 | 862 | 105.375 | 107.750 | 10 + 930 | 937 | 92.380 | 96.375 | 8 + 1188 | 1197 | 105.250 | 108.380 | 10 + 1198 | 1206 | 100.125 | 100.562 | 9 + 1250 | 1259 | 131.437 | 130.875 | 10 + 1285 | 1295 | 176.940 | 182.190 | 11 + 1298 | 1307 | 181.000 | 178.562 | 10 + 1310 | 1322 | 186.310 | 212.000 | 13 + 1405 | 1412 | 117.800 | 117.500 | 8 + 1467 | 1474 | 70.400 | 71.830 | 8 + 1494 | 1502 | 79.760 | 86.000 | 9 + 1600 | 1618 | 90.440 | 77.050 | 19 +(26 rows) + +-- W-bottom: decline, bounce, re-decline, recovery +SELECT * FROM ( + SELECT first_value(rn) OVER w AS start_rn, + last_value(rn) OVER w AS end_rn, + first_value(price) OVER w AS start_price, + last_value(price) OVER w AS end_price, + count(*) OVER w AS days + FROM rpr_stock + WINDOW w AS ( + PARTITION BY part_id + ORDER BY rn + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (DECLINE{3,} BOUNCE{3,} DIP{3,} RECOVER{3,}) + DEFINE + DECLINE AS price < PREV(price), + BOUNCE AS price > PREV(price), + DIP AS price < PREV(price), + RECOVER AS price > PREV(price) + ) +) t WHERE days > 0 ORDER BY start_rn; + start_rn | end_rn | start_price | end_price | days +----------+--------+-------------+-----------+------ + 136 | 153 | 444.000 | 434.250 | 18 + 456 | 469 | 64.500 | 65.125 | 14 + 520 | 534 | 115.250 | 115.750 | 15 + 610 | 623 | 107.125 | 109.000 | 14 + 791 | 802 | 113.500 | 118.250 | 12 + 942 | 953 | 91.250 | 89.875 | 12 + 1188 | 1206 | 105.250 | 100.562 | 19 + 1560 | 1574 | 87.420 | 90.000 | 15 +(8 rows) + +-- Volume surge streak: 6+ consecutive days of increasing volume +SELECT * FROM ( + SELECT first_value(rn) OVER w AS start_rn, + last_value(rn) OVER w AS end_rn, + first_value(volume) OVER w AS start_vol, + last_value(volume) OVER w AS end_vol, + count(*) OVER w AS days + FROM rpr_stock + WINDOW w AS ( + PARTITION BY part_id + ORDER BY rn + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (INIT SURGE{5,}) + DEFINE + SURGE AS volume > PREV(volume) + ) +) t WHERE days > 0 ORDER BY start_rn; + start_rn | end_rn | start_vol | end_vol | days +----------+--------+-----------+----------+------ + 186 | 191 | 25100 | 35300 | 6 + 291 | 296 | 52200 | 188300 | 6 + 408 | 413 | 163300 | 610300 | 6 + 439 | 444 | 438400 | 1089200 | 6 + 500 | 506 | 373700 | 1114200 | 7 + 551 | 558 | 691100 | 2097500 | 8 + 635 | 640 | 418300 | 1388100 | 6 + 783 | 788 | 769000 | 1564900 | 6 + 824 | 830 | 682200 | 2418700 | 7 + 968 | 974 | 993100 | 2341200 | 7 + 1072 | 1077 | 1257200 | 2327700 | 6 + 1078 | 1084 | 1748300 | 5514300 | 7 + 1093 | 1098 | 2664400 | 13145900 | 6 + 1334 | 1339 | 3220900 | 10552600 | 6 + 1524 | 1530 | 3562400 | 7012100 | 7 + 1533 | 1538 | 3548500 | 8560800 | 6 + 1575 | 1580 | 4226500 | 6952100 | 6 +(17 rows) + +-- Volatility squeeze: consecutive narrowing of daily price range +SELECT * FROM ( + SELECT first_value(rn) OVER w AS start_rn, + last_value(rn) OVER w AS end_rn, + first_value(high - low) OVER w AS start_range, + last_value(high - low) OVER w AS end_range, + count(*) OVER w AS days + FROM rpr_stock + WINDOW w AS ( + PARTITION BY part_id + ORDER BY rn + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (INIT NARROW{5,}) + DEFINE + NARROW AS (high - low) < PREV(high) - PREV(low) + ) +) t WHERE days > 0 ORDER BY start_rn; + start_rn | end_rn | start_range | end_range | days +----------+--------+-------------+-----------+------ + 128 | 133 | 11.000 | 3.000 | 6 + 170 | 175 | 10.250 | 2.750 | 6 + 194 | 201 | 10.000 | 1.625 | 8 + 283 | 288 | 7.000 | 2.375 | 6 + 320 | 325 | 4.750 | 2.500 | 6 + 578 | 583 | 2.750 | 1.125 | 6 + 725 | 731 | 3.370 | 1.125 | 7 + 775 | 780 | 4.500 | 0.875 | 6 + 913 | 918 | 2.870 | 1.250 | 6 + 1130 | 1135 | 3.000 | 1.125 | 6 + 1348 | 1353 | 6.620 | 1.562 | 6 +(11 rows) + +-- Gap up: open significantly higher than previous close (5%+) +SELECT * FROM ( + SELECT first_value(rn) OVER w AS gap_rn, + first_value(price) OVER w AS prev_close, + last_value(open) OVER w AS gap_open, + count(*) OVER w AS cnt + FROM rpr_stock + WINDOW w AS ( + PARTITION BY part_id + ORDER BY rn + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (PREV_DAY GAP_UP) + DEFINE + GAP_UP AS open > PREV(price) * 1.05 + ) +) t WHERE cnt > 0 ORDER BY gap_rn; + gap_rn | prev_close | gap_open | cnt +--------+------------+----------+----- + 986 | 48.875 | 52.875 | 2 + 994 | 42.375 | 44.880 | 2 + 1029 | 52.250 | 55.000 | 2 + 1039 | 55.880 | 59.000 | 2 + 1177 | 142.380 | 150.500 | 2 + 1238 | 122.000 | 128.250 | 2 + 1318 | 171.875 | 197.250 | 2 + 1383 | 103.500 | 112.000 | 2 + 1392 | 96.687 | 104.375 | 2 + 1401 | 106.500 | 112.000 | 2 + 1464 | 66.400 | 70.370 | 2 + 1477 | 57.580 | 62.000 | 2 + 1479 | 63.420 | 67.750 | 2 + 1481 | 64.900 | 72.800 | 2 + 1517 | 90.310 | 95.070 | 2 +(15 rows) + +-- Price-volume divergence: price rising while volume declining (bearish signal) +SELECT * FROM ( + SELECT first_value(rn) OVER w AS start_rn, + last_value(rn) OVER w AS end_rn, + first_value(price) OVER w AS start_price, + last_value(price) OVER w AS end_price, + count(*) OVER w AS days + FROM rpr_stock + WINDOW w AS ( + PARTITION BY part_id + ORDER BY rn + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (INIT DIVERGE{3,}) + DEFINE + DIVERGE AS price > PREV(price) AND volume < PREV(volume) + ) +) t WHERE days > 0 ORDER BY start_rn; + start_rn | end_rn | start_price | end_price | days +----------+--------+-------------+-----------+------ + 10 | 13 | 338.500 | 349.750 | 4 + 47 | 50 | 352.000 | 358.000 | 4 + 53 | 56 | 314.000 | 319.000 | 4 + 63 | 66 | 294.500 | 301.750 | 4 + 99 | 102 | 401.000 | 404.000 | 4 + 177 | 180 | 248.500 | 261.750 | 4 + 209 | 212 | 152.000 | 166.500 | 4 + 237 | 240 | 207.500 | 215.500 | 4 + 274 | 277 | 259.500 | 264.500 | 4 + 347 | 350 | 306.750 | 312.000 | 4 + 531 | 534 | 111.875 | 115.750 | 4 + 545 | 548 | 118.000 | 121.000 | 4 + 561 | 564 | 131.130 | 134.250 | 4 + 586 | 589 | 110.250 | 112.000 | 4 + 645 | 648 | 129.630 | 133.250 | 4 + 656 | 660 | 124.500 | 128.125 | 5 + 679 | 682 | 128.000 | 129.750 | 4 + 734 | 737 | 121.380 | 123.750 | 4 + 763 | 766 | 150.750 | 156.500 | 4 + 844 | 847 | 105.500 | 107.880 | 4 + 921 | 924 | 97.250 | 101.250 | 4 + 956 | 959 | 84.875 | 87.500 | 4 + 960 | 963 | 88.250 | 89.500 | 4 + 987 | 990 | 49.000 | 51.500 | 4 + 1023 | 1026 | 53.250 | 54.625 | 4 + 1033 | 1036 | 57.125 | 58.380 | 4 + 1060 | 1063 | 89.000 | 93.625 | 4 + 1066 | 1069 | 89.130 | 93.000 | 4 + 1087 | 1090 | 89.375 | 91.750 | 4 + 1165 | 1168 | 137.625 | 146.875 | 4 + 1202 | 1205 | 96.500 | 98.750 | 4 + 1209 | 1212 | 96.125 | 100.250 | 4 + 1220 | 1223 | 106.130 | 112.000 | 4 + 1231 | 1234 | 117.062 | 119.625 | 4 + 1244 | 1247 | 122.500 | 130.000 | 4 + 1304 | 1307 | 169.500 | 178.562 | 4 + 1342 | 1345 | 122.000 | 124.000 | 4 + 1417 | 1420 | 101.960 | 108.530 | 4 + 1433 | 1436 | 90.000 | 93.770 | 4 + 1437 | 1440 | 96.950 | 98.500 | 4 + 1443 | 1446 | 120.250 | 122.200 | 4 + 1449 | 1452 | 74.650 | 77.140 | 4 + 1455 | 1458 | 67.600 | 73.500 | 4 + 1497 | 1500 | 76.250 | 81.650 | 4 + 1505 | 1508 | 75.860 | 79.510 | 4 + 1511 | 1514 | 79.750 | 81.020 | 4 + 1518 | 1521 | 94.020 | 97.700 | 4 + 1541 | 1544 | 83.650 | 85.250 | 4 + 1547 | 1550 | 82.210 | 84.040 | 4 + 1585 | 1588 | 92.700 | 93.570 | 4 + 1594 | 1597 | 91.600 | 92.410 | 4 + 1613 | 1616 | 72.010 | 74.610 | 4 +(52 rows) + +-- Consolidation then breakout: sideways movement followed by sharp rise +SELECT * FROM ( + SELECT first_value(rn) OVER w AS start_rn, + last_value(rn) OVER w AS end_rn, + first_value(price) OVER w AS start_price, + last_value(price) OVER w AS end_price, + count(*) OVER w AS days + FROM rpr_stock + WINDOW w AS ( + PARTITION BY part_id + ORDER BY rn + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (FLAT{5,} BREAKOUT) + DEFINE + FLAT AS price BETWEEN PREV(price) * 0.98 AND PREV(price) * 1.02, + BREAKOUT AS price > PREV(price) * 1.05 + ) +) t WHERE days > 0 ORDER BY start_rn; + start_rn | end_rn | start_price | end_price | days +----------+--------+-------------+-----------+------ + 69 | 81 | 296.750 | 314.000 | 13 + 214 | 225 | 171.500 | 164.125 | 12 + 371 | 395 | 71.630 | 71.250 | 25 + 416 | 424 | 54.250 | 53.875 | 9 + 484 | 494 | 75.500 | 79.000 | 11 + 865 | 892 | 112.500 | 115.750 | 28 + 1007 | 1020 | 59.500 | 58.630 | 14 + 1113 | 1118 | 110.750 | 117.625 | 6 + 1146 | 1152 | 133.750 | 145.000 | 7 + 1171 | 1178 | 137.880 | 153.625 | 8 + 1350 | 1357 | 106.125 | 112.000 | 8 + 1360 | 1370 | 109.125 | 115.625 | 11 +(12 rows) + +-- Dead cat bounce: decline followed by weak recovery (<1% per day) +SELECT * FROM ( + SELECT first_value(rn) OVER w AS start_rn, + last_value(rn) OVER w AS end_rn, + first_value(price) OVER w AS start_price, + last_value(price) OVER w AS end_price, + count(*) OVER w AS days + FROM rpr_stock + WINDOW w AS ( + PARTITION BY part_id + ORDER BY rn + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (DECLINE{4,} BOUNCE{3,}) + DEFINE + DECLINE AS price < PREV(price), + BOUNCE AS price > PREV(price) AND price < PREV(price) * 1.01 + ) +) t WHERE days > 0 ORDER BY start_rn; + start_rn | end_rn | start_price | end_price | days +----------+--------+-------------+-----------+------ + 59 | 66 | 315.000 | 301.750 | 8 + 262 | 271 | 277.625 | 275.500 | 10 + 280 | 287 | 272.000 | 261.250 | 8 + 361 | 368 | 75.375 | 74.130 | 8 + 427 | 433 | 65.130 | 65.500 | 7 + 447 | 453 | 55.500 | 54.625 | 7 + 653 | 659 | 129.375 | 126.625 | 7 + 674 | 682 | 132.375 | 129.750 | 9 + 833 | 839 | 106.500 | 107.125 | 7 + 1423 | 1430 | 108.180 | 106.200 | 8 + 1591 | 1597 | 92.920 | 92.410 | 7 +(11 rows) + +-- Uptrend: 7+ consecutive days of higher highs AND higher lows +SELECT * FROM ( + SELECT first_value(rn) OVER w AS start_rn, + last_value(rn) OVER w AS end_rn, + first_value(price) OVER w AS start_price, + last_value(price) OVER w AS end_price, + count(*) OVER w AS days + FROM rpr_stock + WINDOW w AS ( + PARTITION BY part_id + ORDER BY rn + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (UPTREND{7,}) + DEFINE + UPTREND AS high > PREV(high) AND low > PREV(low) + ) +) t WHERE days > 0 ORDER BY start_rn; + start_rn | end_rn | start_price | end_price | days +----------+--------+-------------+-----------+------ + 156 | 162 | 317.000 | 329.500 | 7 + 299 | 305 | 266.000 | 273.500 | 7 + 696 | 702 | 144.630 | 152.500 | 7 + 741 | 747 | 155.250 | 166.750 | 7 + 895 | 901 | 119.250 | 126.750 | 7 + 1121 | 1127 | 103.630 | 108.875 | 7 + 1211 | 1217 | 99.130 | 102.875 | 7 + 1271 | 1278 | 164.375 | 189.250 | 8 + 1621 | 1628 | 84.000 | 89.820 | 8 +(9 rows) + +-- Panic and snap-back: 3%+ daily drops followed by 2%+ rebound +SELECT * FROM ( + SELECT first_value(rn) OVER w AS start_rn, + last_value(rn) OVER w AS end_rn, + first_value(price) OVER w AS start_price, + last_value(price) OVER w AS end_price, + count(*) OVER w AS days + FROM rpr_stock + WINDOW w AS ( + PARTITION BY part_id + ORDER BY rn + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (PANIC{2,} SNAP) + DEFINE + PANIC AS price < PREV(price) * 0.97, + SNAP AS price > PREV(price) * 1.02 + ) +) t WHERE days > 0 ORDER BY start_rn; + start_rn | end_rn | start_price | end_price | days +----------+--------+-------------+-----------+------ + 24 | 26 | 230.625 | 229.875 | 3 + 165 | 167 | 272.000 | 270.750 | 3 + 169 | 171 | 260.000 | 259.500 | 3 + 769 | 772 | 140.125 | 115.000 | 4 + 977 | 979 | 56.130 | 53.000 | 3 + 980 | 982 | 51.375 | 51.750 | 3 + 1387 | 1389 | 86.000 | 89.000 | 3 + 1396 | 1398 | 99.290 | 98.390 | 3 +(8 rows) + +-- Volume climax reversal: uptrend, volume spike (1.5x), then decline +SELECT * FROM ( + SELECT first_value(rn) OVER w AS start_rn, + last_value(rn) OVER w AS end_rn, + first_value(price) OVER w AS start_price, + last_value(price) OVER w AS end_price, + count(*) OVER w AS days + FROM rpr_stock + WINDOW w AS ( + PARTITION BY part_id + ORDER BY rn + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (RALLY{3,} CLIMAX SELLOFF{2,}) + DEFINE + RALLY AS price > PREV(price), + CLIMAX AS volume > PREV(volume) * 1.5, + SELLOFF AS price < PREV(price) + ) +) t WHERE days > 0 ORDER BY start_rn; + start_rn | end_rn | start_price | end_price | days +----------+--------+-------------+-----------+------ + 2 | 7 | 368.250 | 367.750 | 6 + 16 | 21 | 349.250 | 344.500 | 6 + 105 | 111 | 388.500 | 394.500 | 7 + 228 | 234 | 164.250 | 163.250 | 7 + 243 | 248 | 223.250 | 228.500 | 6 + 251 | 259 | 251.000 | 253.500 | 9 + 352 | 358 | 310.500 | 309.000 | 7 + 398 | 405 | 65.380 | 65.000 | 8 + 466 | 472 | 64.250 | 63.875 | 7 + 586 | 595 | 110.250 | 112.250 | 10 + 626 | 632 | 125.750 | 123.250 | 7 + 700 | 707 | 150.250 | 152.750 | 8 + 714 | 722 | 136.125 | 137.000 | 9 + 740 | 750 | 154.250 | 163.000 | 11 + 805 | 811 | 116.875 | 116.000 | 7 + 814 | 821 | 99.500 | 97.130 | 8 + 850 | 857 | 107.880 | 99.250 | 8 + 922 | 927 | 99.750 | 99.875 | 6 + 934 | 939 | 92.375 | 92.880 | 6 + 998 | 1004 | 52.000 | 53.130 | 7 + 1043 | 1049 | 73.125 | 73.000 | 7 + 1052 | 1057 | 72.130 | 74.250 | 6 + 1138 | 1143 | 127.750 | 125.625 | 6 + 1155 | 1162 | 158.000 | 155.625 | 8 + 1181 | 1191 | 99.130 | 100.062 | 11 + 1192 | 1202 | 100.130 | 96.500 | 11 + 1236 | 1241 | 118.380 | 123.880 | 6 + 1272 | 1282 | 166.062 | 183.000 | 11 + 1325 | 1331 | 217.250 | 237.500 | 7 + 1409 | 1414 | 112.890 | 116.100 | 6 + 1456 | 1461 | 68.580 | 68.760 | 6 +(31 rows) + diff --git a/src/test/regress/expected/rpr_base.out b/src/test/regress/expected/rpr_base.out new file mode 100644 index 00000000000..a63211ff364 --- /dev/null +++ b/src/test/regress/expected/rpr_base.out @@ -0,0 +1,6589 @@ +-- ============================================================ +-- RPR Base Tests +-- Tests for Row Pattern Recognition (ISO/IEC 19075-5:2016) +-- ============================================================ +-- +-- Parser Layer: +-- Keyword Usage Tests +-- DEFINE Clause Tests +-- FRAME Options Tests +-- PARTITION BY + FRAME Tests +-- PATTERN Syntax Tests +-- Quantifiers Tests +-- Navigation Functions Tests +-- SKIP TO / INITIAL Tests +-- Serialization/Deserialization Tests (objects kept for pg_upgrade/pg_dump) +-- Error Cases Tests +-- Window Deduplication Tests +-- +-- Planner Layer: +-- Pattern Optimization Tests +-- Absorption Flag Display Tests +-- Absorption Analysis Tests +-- Edge Case Tests +-- Optimization Fallback Tests +-- Planner Integration Tests +-- Subquery and CTE Tests +-- JOIN Tests +-- Complex Expression Tests +-- Set Operations Tests +-- Sorting and Grouping Tests +-- SQL Function Inlining Tests +-- Stress Tests +-- Error Limit Tests +-- +-- Contributed Tests: +-- Jacob's Patterns +-- Pathological Patterns +-- ============================================================ +SET client_min_messages = WARNING; +-- ============================================================ +-- Keyword Usage Tests +-- ============================================================ +-- RPR keywords as column names +-- Keywords: define, initial, past, pattern, seek +CREATE TABLE rpr_keywords ( + id INT, + define INT, -- DEFINE keyword + initial INT, -- INITIAL keyword + past INT, -- PAST keyword + pattern INT, -- PATTERN keyword + seek INT, -- SEEK keyword + skip INT -- SKIP keyword (pre-existing) +); +INSERT INTO rpr_keywords VALUES (1, 10, 20, 30, 40, 50, 60); +SELECT id, define, initial, past, pattern, seek, skip +FROM rpr_keywords +ORDER BY id; + id | define | initial | past | pattern | seek | skip +----+--------+---------+------+---------+------+------ + 1 | 10 | 20 | 30 | 40 | 50 | 60 +(1 row) + +DROP TABLE rpr_keywords; +-- ============================================================ +-- DEFINE Clause Tests +-- ============================================================ +-- Simple column references +CREATE TABLE stock_price ( + dt DATE, + symbol TEXT, + price NUMERIC, + volume INT +); +INSERT INTO stock_price VALUES + ('2024-01-01', 'AAPL', 150, 1000), + ('2024-01-02', 'AAPL', 155, 1200), + ('2024-01-03', 'AAPL', 152, 900), + ('2024-01-04', 'AAPL', 160, 1500), + ('2024-01-05', 'AAPL', 158, 1100); +-- Simple column reference +SELECT dt, price, COUNT(*) OVER w as cnt +FROM stock_price +WINDOW w AS ( + PARTITION BY symbol + ORDER BY dt + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (UP+) + DEFINE UP AS price > 150 +) +ORDER BY dt; + dt | price | cnt +------------+-------+----- + 01-01-2024 | 150 | 0 + 01-02-2024 | 155 | 4 + 01-03-2024 | 152 | 0 + 01-04-2024 | 160 | 0 + 01-05-2024 | 158 | 0 +(5 rows) + +-- Multiple column references +SELECT dt, price, volume, COUNT(*) OVER w as cnt +FROM stock_price +WINDOW w AS ( + PARTITION BY symbol + ORDER BY dt + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (GOOD+) + DEFINE GOOD AS price > 150 AND volume > 1000 +) +ORDER BY dt; + dt | price | volume | cnt +------------+-------+--------+----- + 01-01-2024 | 150 | 1000 | 0 + 01-02-2024 | 155 | 1200 | 1 + 01-03-2024 | 152 | 900 | 0 + 01-04-2024 | 160 | 1500 | 2 + 01-05-2024 | 158 | 1100 | 0 +(5 rows) + +-- Expression in DEFINE +SELECT dt, price, COUNT(*) OVER w as cnt +FROM stock_price +WINDOW w AS ( + PARTITION BY symbol + ORDER BY dt + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (HIGH+) + DEFINE HIGH AS price * 1.1 > 165 +) +ORDER BY dt; + dt | price | cnt +------------+-------+----- + 01-01-2024 | 150 | 0 + 01-02-2024 | 155 | 4 + 01-03-2024 | 152 | 0 + 01-04-2024 | 160 | 0 + 01-05-2024 | 158 | 0 +(5 rows) + +-- Arithmetic and functions +SELECT dt, price, volume, COUNT(*) OVER w as cnt +FROM stock_price +WINDOW w AS ( + PARTITION BY symbol + ORDER BY dt + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (CALC+) + DEFINE CALC AS (price + volume / 100) > 160 +) +ORDER BY dt; + dt | price | volume | cnt +------------+-------+--------+----- + 01-01-2024 | 150 | 1000 | 0 + 01-02-2024 | 155 | 1200 | 4 + 01-03-2024 | 152 | 900 | 0 + 01-04-2024 | 160 | 1500 | 0 + 01-05-2024 | 158 | 1100 | 0 +(5 rows) + +DROP TABLE stock_price; +-- Auto-generated DEFINE +CREATE TABLE rpr_auto (id INT, val INT); +INSERT INTO rpr_auto VALUES (1, 10), (2, 20), (3, 30), (4, 15); +-- One variable undefined (B auto-generated as "B IS TRUE") +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_auto +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ B*) + DEFINE A AS val > 15 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 3 + 3 | 30 | 0 + 4 | 15 | 0 +(4 rows) + +-- Multiple undefined variables +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_auto +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B C) + DEFINE A AS val > 0 + -- B and C auto-generated as "B IS TRUE", "C IS TRUE" +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 15 | 0 +(4 rows) + +-- All variables defined explicitly +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_auto +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (X Y Z) + DEFINE + X AS val > 10, + Y AS val > 20, + Z AS val < 20 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 3 + 3 | 30 | 0 + 4 | 15 | 0 +(4 rows) + +DROP TABLE rpr_auto; +-- Duplicate variable names +CREATE TABLE rpr_dup (id INT); +INSERT INTO rpr_dup VALUES (1), (2); +-- Duplicate DEFINE entries +SELECT COUNT(*) OVER w +FROM rpr_dup +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS id > 0, A AS id < 10 +); +ERROR: DEFINE variable "a" appears more than once +LINE 7: DEFINE A AS id > 0, A AS id < 10 + ^ +-- Expected: ERROR: row pattern definition variable name "a" appears more than once in DEFINE clause +DROP TABLE rpr_dup; +-- Boolean coercion +CREATE TABLE rpr_bool (id INT, flag BOOLEAN); +INSERT INTO rpr_bool VALUES (1, true), (2, false); +-- Non-boolean expression +SELECT COUNT(*) OVER w +FROM rpr_bool +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS id +); +ERROR: argument of DEFINE must be type boolean, not type integer +LINE 7: DEFINE A AS id + ^ +-- Expected: ERROR: argument of DEFINE must be type boolean +-- Boolean column reference +SELECT id, flag, COUNT(*) OVER w as cnt +FROM rpr_bool +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (T+) + DEFINE T AS flag +) +ORDER BY id; + id | flag | cnt +----+------+----- + 1 | t | 1 + 2 | f | 0 +(2 rows) + +-- NULL::boolean +SELECT id, COUNT(*) OVER w as cnt +FROM rpr_bool +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (N+) + DEFINE N AS NULL::boolean +) +ORDER BY id; + id | cnt +----+----- + 1 | 0 + 2 | 0 +(2 rows) + +-- Implicit cast to boolean via custom type +CREATE TYPE truthyint AS (v int); +CREATE FUNCTION truthyint_to_bool(truthyint) RETURNS boolean AS $$ + SELECT ($1).v <> 0; +$$ LANGUAGE SQL IMMUTABLE STRICT; +CREATE CAST (truthyint AS boolean) + WITH FUNCTION truthyint_to_bool(truthyint) + AS ASSIGNMENT; +CREATE TABLE rpr_coerce (id int, val truthyint); +INSERT INTO rpr_coerce VALUES (1, ROW(1)), (2, ROW(0)), (3, ROW(5)), (4, ROW(0)); +SELECT id, val, cnt +FROM (SELECT id, val, + COUNT(*) OVER w AS cnt + FROM rpr_coerce + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val + ) +) s ORDER BY id; + id | val | cnt +----+-----+----- + 1 | (1) | 1 + 2 | (0) | 0 + 3 | (5) | 1 + 4 | (0) | 0 +(4 rows) + +DROP TABLE rpr_coerce; +DROP CAST (truthyint AS boolean); +DROP FUNCTION truthyint_to_bool(truthyint); +DROP TYPE truthyint; +DROP TABLE rpr_bool; +-- Complex expressions +CREATE TABLE rpr_complex (id INT, val1 INT, val2 INT); +INSERT INTO rpr_complex VALUES (1, 10, 20), (2, 15, 25), (3, 20, 30); +-- CASE expression +SELECT id, val1, val2, COUNT(*) OVER w as cnt +FROM rpr_complex +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (C+) + DEFINE C AS CASE WHEN val1 > 10 THEN val2 > 20 ELSE false END +) +ORDER BY id; + id | val1 | val2 | cnt +----+------+------+----- + 1 | 10 | 20 | 0 + 2 | 15 | 25 | 2 + 3 | 20 | 30 | 0 +(3 rows) + +DROP TABLE rpr_complex; +-- Pattern variable not in PATTERN (should be ignored) +CREATE TABLE rpr_unused (id INT); +INSERT INTO rpr_unused VALUES (1), (2); +-- Extra DEFINE variable +SELECT id, COUNT(*) OVER w as cnt +FROM rpr_unused +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS id > 0, B AS id > 5 -- B not in pattern +) +ORDER BY id; +ERROR: DEFINE variable "b" is not used in PATTERN +LINE 7: DEFINE A AS id > 0, B AS id > 5 -- B not in pattern + ^ +DROP TABLE rpr_unused; +-- ============================================================ +-- FRAME Options Tests +-- ============================================================ +CREATE TABLE rpr_frame (id INT, val INT); +INSERT INTO rpr_frame VALUES + (1, 10), (2, 10), (3, 10), -- Same val: 10 + (4, 20), (5, 20), -- Same val: 20 + (6, 30); +-- Valid frame options +-- ROWS: counts physical rows (1 FOLLOWING = next 1 physical row) +-- Expected result: Each row can see 1 physical row ahead +-- id=1,2,3 (val=10): can see next row -> cnt=2 +-- id=4,5 (val=20): can see next row -> cnt=2 +-- id=6 (val=30): no next row -> cnt=1 +-- Result: [2,2,2,2,2,1] +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY val + ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B?) + DEFINE A AS val >= 0, B AS val >= 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 10 | 2 + 3 | 10 | 2 + 4 | 20 | 2 + 5 | 20 | 2 + 6 | 30 | 1 +(6 rows) + +-- Invalid frame start positions +-- Not starting at CURRENT ROW +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: FRAME must start at CURRENT ROW when using row pattern recognition +LINE 5: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ^ +DETAIL: Current frame starts with UNBOUNDED PRECEDING. +HINT: Use: ROWS BETWEEN CURRENT ROW AND ... +-- Expected: ERROR: FRAME must start at current row when row pattern recognition is used +-- EXCLUDE options +-- EXCLUDE not permitted +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + EXCLUDE CURRENT ROW + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: cannot use EXCLUDE options with row pattern recognition +LINE 6: EXCLUDE CURRENT ROW + ^ +DETAIL: Frame definition includes EXCLUDE CURRENT ROW. +HINT: Remove the EXCLUDE clause from the window definition. +-- Expected: ERROR: cannot use EXCLUDE options with row pattern recognition +-- EXCLUDE GROUP not permitted +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + EXCLUDE GROUP + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: cannot use EXCLUDE options with row pattern recognition +LINE 6: EXCLUDE GROUP + ^ +DETAIL: Frame definition includes EXCLUDE GROUP. +HINT: Remove the EXCLUDE clause from the window definition. +-- Expected: ERROR: cannot use EXCLUDE options with row pattern recognition +-- EXCLUDE TIES not permitted +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + EXCLUDE TIES + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: cannot use EXCLUDE options with row pattern recognition +LINE 6: EXCLUDE TIES + ^ +DETAIL: Frame definition includes EXCLUDE TIES. +HINT: Remove the EXCLUDE clause from the window definition. +-- Expected: ERROR: cannot use EXCLUDE options with row pattern recognition +-- RANGE frame not starting at CURRENT ROW +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: cannot use FRAME option RANGE with row pattern recognition +LINE 5: RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWIN... + ^ +HINT: Use ROWS instead. +-- Expected: ERROR: cannot use FRAME option RANGE with row pattern recognition +-- GROUPS frame not starting at CURRENT ROW +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + GROUPS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: cannot use FRAME option GROUPS with row pattern recognition +LINE 5: GROUPS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWI... + ^ +HINT: Use ROWS instead. +-- Expected: ERROR: cannot use FRAME option GROUPS with row pattern recognition +-- Starting with N PRECEDING +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: FRAME must start at CURRENT ROW when using row pattern recognition +LINE 5: ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING + ^ +DETAIL: Current frame starts with offset PRECEDING. +HINT: Use: ROWS BETWEEN CURRENT ROW AND ... +-- Expected: ERROR: FRAME must start at current row when row pattern recognition is used +-- Starting with N FOLLOWING +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: FRAME must start at CURRENT ROW when using row pattern recognition +LINE 5: ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING + ^ +DETAIL: Current frame starts with offset FOLLOWING. +HINT: Use: ROWS BETWEEN CURRENT ROW AND ... +-- Expected: ERROR: FRAME must start at current row when row pattern recognition is used +-- Frame end bound edge cases +-- End before start: CURRENT ROW AND 1 PRECEDING +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 1 PRECEDING + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: frame starting from current row cannot have preceding rows +LINE 5: ROWS BETWEEN CURRENT ROW AND 1 PRECEDING + ^ +-- Expected: ERROR: frame starting from current row cannot have preceding rows +-- End before start: CURRENT ROW AND UNBOUNDED PRECEDING +SELECT COUNT(*) OVER w +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED PRECEDING + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: frame end cannot be UNBOUNDED PRECEDING +LINE 5: ROWS BETWEEN CURRENT ROW AND UNBOUNDED PRECEDING + ^ +-- Expected: ERROR: frame end cannot be UNBOUNDED PRECEDING +-- Single row frame: CURRENT ROW AND CURRENT ROW +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND CURRENT ROW + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 1 + 2 | 10 | 1 + 3 | 10 | 1 + 4 | 20 | 1 + 5 | 20 | 1 + 6 | 30 | 1 +(6 rows) + +-- Zero offset: CURRENT ROW AND 0 FOLLOWING (equivalent to CURRENT ROW) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 0 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 1 + 2 | 10 | 1 + 3 | 10 | 1 + 4 | 20 | 1 + 5 | 20 | 1 + 6 | 30 | 1 +(6 rows) + +-- Large offset: CURRENT ROW AND 1000 FOLLOWING +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 1000 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 6 + 2 | 10 | 5 + 3 | 10 | 4 + 4 | 20 | 3 + 5 | 20 | 2 + 6 | 30 | 1 +(6 rows) + +-- Maximum offset: CURRENT ROW AND 2147483646 FOLLOWING (INT_MAX - 1) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2147483646 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 6 + 2 | 10 | 5 + 3 | 10 | 4 + 4 | 20 | 3 + 5 | 20 | 2 + 6 | 30 | 1 +(6 rows) + +-- RANGE frame with RPR (not permitted) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY val + RANGE BETWEEN CURRENT ROW AND 10 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B?) + DEFINE A AS val >= 0, B AS val >= 0 +) +ORDER BY id; +ERROR: cannot use FRAME option RANGE with row pattern recognition +LINE 5: RANGE BETWEEN CURRENT ROW AND 10 FOLLOWING + ^ +HINT: Use ROWS instead. +-- Expected: ERROR: cannot use FRAME option RANGE with row pattern recognition +-- GROUPS frame with RPR (not permitted) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_frame +WINDOW w AS ( + ORDER BY val + GROUPS BETWEEN CURRENT ROW AND 1 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B?) + DEFINE A AS val >= 0, B AS val >= 0 +) +ORDER BY id; +ERROR: cannot use FRAME option GROUPS with row pattern recognition +LINE 5: GROUPS BETWEEN CURRENT ROW AND 1 FOLLOWING + ^ +HINT: Use ROWS instead. +-- Expected: ERROR: cannot use FRAME option GROUPS with row pattern recognition +DROP TABLE rpr_frame; +-- ============================================================ +-- PARTITION BY + FRAME Tests +-- ============================================================ +-- Test PARTITION BY with RPR to ensure proper partitioning behavior +CREATE TABLE rpr_partition (id INT, grp INT, val INT); +INSERT INTO rpr_partition VALUES + (1, 1, 10), (2, 1, 20), (3, 1, 30), + (4, 2, 15), (5, 2, 25), (6, 2, 35); +-- PARTITION BY with ROWS frame +SELECT id, grp, val, COUNT(*) OVER w as cnt +FROM rpr_partition +WINDOW w AS ( + PARTITION BY grp + ORDER BY val + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B+) + DEFINE A AS val >= 10, B AS val > 15 +) +ORDER BY id; + id | grp | val | cnt +----+-----+-----+----- + 1 | 1 | 10 | 3 + 2 | 1 | 20 | 2 + 3 | 1 | 30 | 0 + 4 | 2 | 15 | 3 + 5 | 2 | 25 | 2 + 6 | 2 | 35 | 0 +(6 rows) + +-- Expected: Pattern matching should reset for each partition +-- PARTITION BY with RANGE frame +SELECT id, grp, val, COUNT(*) OVER w as cnt +FROM rpr_partition +WINDOW w AS ( + PARTITION BY grp + ORDER BY val + RANGE BETWEEN CURRENT ROW AND 10 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B?) + DEFINE A AS val >= 10, B AS val >= 20 +) +ORDER BY id; +ERROR: cannot use FRAME option RANGE with row pattern recognition +LINE 6: RANGE BETWEEN CURRENT ROW AND 10 FOLLOWING + ^ +HINT: Use ROWS instead. +-- Expected: ERROR: cannot use FRAME option RANGE with row pattern recognition +DROP TABLE rpr_partition; +-- ============================================================ +-- PATTERN Syntax Tests +-- ============================================================ +CREATE TABLE rpr_pattern (id INT, val INT); +INSERT INTO rpr_pattern VALUES + (1, 5), (2, 10), (3, 15), (4, 20), (5, 25), + (6, 30), (7, 35), (8, 40), (9, 45), (10, 50); +-- Alternation (|) +-- Multiple alternatives +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ | B+ | C+) + DEFINE A AS val > 35, B AS val BETWEEN 15 AND 35, C AS val < 15 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 5 | 2 + 2 | 10 | 0 + 3 | 15 | 5 + 4 | 20 | 0 + 5 | 25 | 0 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 40 | 3 + 9 | 45 | 0 + 10 | 50 | 0 +(10 rows) + +-- Grouping +-- Nested grouping with quantifier +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B) C)+) + DEFINE A AS val > 10, B AS val > 20, C AS val > 30 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 5 | 0 + 2 | 10 | 0 + 3 | 15 | 0 + 4 | 20 | 0 + 5 | 25 | 6 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 40 | 0 + 9 | 45 | 0 + 10 | 50 | 0 +(10 rows) + +-- Sequence +-- Multi-element sequence +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B C D E) + DEFINE + A AS val < 15, + B AS val BETWEEN 15 AND 25, + C AS val BETWEEN 25 AND 35, + D AS val BETWEEN 35 AND 45, + E AS val >= 45 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 5 | 0 + 2 | 10 | 0 + 3 | 15 | 0 + 4 | 20 | 0 + 5 | 25 | 0 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 40 | 0 + 9 | 45 | 0 + 10 | 50 | 0 +(10 rows) + +-- Complex combinations +-- Alternation with grouping +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B) | (C D)) + DEFINE A AS val < 20, B AS val >= 20, C AS val < 30, D AS val >= 30 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 5 | 0 + 2 | 10 | 0 + 3 | 15 | 2 + 4 | 20 | 0 + 5 | 25 | 2 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 40 | 0 + 9 | 45 | 0 + 10 | 50 | 0 +(10 rows) + +-- Alternation + sequence + grouping +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (START (UP{2,} DOWN? | FLAT+) FINISH) + DEFINE + START AS val >= 0, + UP AS val > 20, + DOWN AS val <= 30, + FLAT AS val BETWEEN 25 AND 35, + FINISH AS val > 40 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 5 | 0 + 2 | 10 | 0 + 3 | 15 | 0 + 4 | 20 | 7 + 5 | 25 | 0 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 40 | 0 + 9 | 45 | 0 + 10 | 50 | 0 +(10 rows) + +-- Nested alternation in groups +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) (C | D)) + DEFINE A AS val < 15, B AS val BETWEEN 15 AND 25, C AS val BETWEEN 25 AND 35, D AS val > 35 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 5 | 0 + 2 | 10 | 0 + 3 | 15 | 0 + 4 | 20 | 2 + 5 | 25 | 0 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 40 | 0 + 9 | 45 | 0 + 10 | 50 | 0 +(10 rows) + +DROP TABLE rpr_pattern; +-- ============================================================ +-- Quantifiers Tests +-- ============================================================ +CREATE TABLE rpr_quant (id INT, val INT); +INSERT INTO rpr_quant VALUES + (1, 10), (2, 20), (3, 30), (4, 40), (5, 50), + (6, 60), (7, 70), (8, 80), (9, 90), (10, 100); +-- Basic greedy quantifiers +-- * (zero or more) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A*) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 10 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- + (one or more) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 50 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 5 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- ? (zero or one) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A?) + DEFINE A AS val = 50 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 1 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Edge case quantifiers +-- {0} is not allowed (min must be >= 1) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{0} B) + DEFINE A AS val > 1000, B AS val > 0 +) +ORDER BY id; +ERROR: quantifier bound must be between 1 and 2147483646 +LINE 6: PATTERN (A{0} B) + ^ +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 +-- {0,0} is not allowed (max must be >= 1) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{0,0} B) + DEFINE A AS val > 1000, B AS val > 0 +) +ORDER BY id; +ERROR: quantifier bounds must be between 0 and 2147483646 with max >= 1 +LINE 6: PATTERN (A{0,0} B) + ^ +-- Expected: ERROR: quantifier bounds must be between 0 and 2147483646 with max >= 1 +-- {0,1} (equivalent to ?) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{0,1}) + DEFINE A AS val = 50 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 1 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Exact quantifiers {n} +-- {3} (representative exact quantifier) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{3}) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 3 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 3 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Range quantifiers {n,} +-- {2,} (representative n or more) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2,}) + DEFINE A AS val > 40 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 6 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Upper bound quantifiers {,m} +-- {,3} (representative up to m) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,3}) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 3 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 3 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 1 +(10 rows) + +-- Range quantifiers {n,m} +-- {3,7} (representative range) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_quant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{3,7}) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 7 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 3 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +DROP TABLE rpr_quant; +-- Reluctant quantifiers +CREATE TABLE rpr_reluctant (id INT, val INT); +INSERT INTO rpr_reluctant VALUES (1, 10), (2, 20), (3, 30); +-- *? (zero or more, reluctant) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A*?) + DEFINE A AS val > 0 +); + count +------- + 0 + 0 + 0 +(3 rows) + +-- Reluctant quantifier: prefer shortest match +-- +? (one or more, reluctant) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+?) + DEFINE A AS val > 0 +); + count +------- + 1 + 1 + 1 +(3 rows) + +-- Reluctant quantifier: prefer shortest match +-- ?? (zero or one, reluctant) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A??) + DEFINE A AS val > 0 +); + count +------- + 0 + 0 + 0 +(3 rows) + +-- Reluctant quantifier: prefer shortest match +-- {n,}? (n or more, reluctant) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2,}?) + DEFINE A AS val > 0 +); + count +------- + 2 + 0 + 0 +(3 rows) + +-- Reluctant quantifier: prefer shortest match +-- {n,m}? (n to m, reluctant) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{1,3}?) + DEFINE A AS val > 0 +); + count +------- + 1 + 1 + 1 +(3 rows) + +-- Reluctant quantifier: prefer shortest match +-- {n}? (exactly n, reluctant) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2}?) + DEFINE A AS val > 0 +); + count +------- + 2 + 0 + 0 +(3 rows) + +-- Reluctant quantifier: prefer shortest match +-- {,m}? (up to m, reluctant) - COMPLETELY UNTESTED RULE! +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,3}?) + DEFINE A AS val > 0 +); + count +------- + 0 + 0 + 0 +(3 rows) + +-- Reluctant quantifier: prefer shortest match +-- Invalid reluctant patterns (wrong token after quantifier) +-- {2}+ (should be {2}? not {2}+) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2}+) + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "+" +LINE 6: PATTERN (A{2}+) + ^ +-- Expected: ERROR: syntax error at or near "+" +-- {2,}* (should be {2,}? not {2,}*) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2,}*) + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "*" +LINE 6: PATTERN (A{2,}*) + ^ +-- Expected: ERROR: syntax error at or near "*" +-- {,3}* (should be {,3}? not {,3}*) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,3}*) + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "*" +LINE 6: PATTERN (A{,3}*) + ^ +-- Expected: ERROR: syntax error at or near "*" +-- {1,3}+ (should be {1,3}? not {1,3}+) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{1,3}+) + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "+" +LINE 6: PATTERN (A{1,3}+) + ^ +-- Expected: ERROR: syntax error at or near "+" +-- Boundary errors in reluctant quantifiers +-- {-1}? (negative bound) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{-1}?) + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "-" +LINE 6: PATTERN (A{-1}?) + ^ +-- Expected: ERROR: syntax error at or near "-" +-- {2147483647}? (INT_MAX) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2147483647}?) + DEFINE A AS val > 0 +); +ERROR: quantifier bound must be between 1 and 2147483646 +LINE 6: PATTERN (A{2147483647}?) + ^ +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 +-- {-1,}? (negative lower bound) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{-1,}?) + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "-" +LINE 6: PATTERN (A{-1,}?) + ^ +-- Expected: ERROR: syntax error at or near "-" +-- {2147483647,}? (INT_MAX lower bound) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2147483647,}?) + DEFINE A AS val > 0 +); +ERROR: quantifier bound must be between 0 and 2147483646 +LINE 6: PATTERN (A{2147483647,}?) + ^ +-- Expected: ERROR: quantifier bound must be between 0 and 2147483646 +-- {,0}? (zero upper bound) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,0}?) + DEFINE A AS val > 0 +); +ERROR: quantifier bound must be between 1 and 2147483646 +LINE 6: PATTERN (A{,0}?) + ^ +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 +-- {,2147483647}? (INT_MAX upper bound) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,2147483647}?) + DEFINE A AS val > 0 +); +ERROR: quantifier bound must be between 1 and 2147483646 +LINE 6: PATTERN (A{,2147483647}?) + ^ +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 +-- {-1,3}? (negative lower in range) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{-1,3}?) + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "-" +LINE 6: PATTERN (A{-1,3}?) + ^ +-- Expected: ERROR: syntax error at or near "-" +-- {1,2147483647}? (INT_MAX upper in range) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{1,2147483647}?) + DEFINE A AS val > 0 +); +ERROR: quantifier bounds must be between 0 and 2147483646 with max >= 1 +LINE 6: PATTERN (A{1,2147483647}?) + ^ +-- Expected: ERROR: quantifier bounds must be between 0 and 2147483646 with max >= 1 +-- {5,3}? (min > max) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{5,3}?) + DEFINE A AS val > 0 +); +ERROR: quantifier minimum bound must not exceed maximum +LINE 6: PATTERN (A{5,3}?) + ^ +-- Expected: ERROR: quantifier minimum bound must not exceed maximum +-- Token-separated reluctant quantifiers (space between quantifier and ?) +-- These may be tokenized differently by the lexer +-- * ? (token separated) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A* ?) + DEFINE A AS val > 0 +); + count +------- + 0 + 0 + 0 +(3 rows) + +-- Reluctant quantifier: prefer shortest match +-- + ? (token separated) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ ?) + DEFINE A AS val > 0 +); + count +------- + 1 + 1 + 1 +(3 rows) + +-- Reluctant quantifier: prefer shortest match +-- {2,} ? (token separated) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2,} ?) + DEFINE A AS val > 0 +); + count +------- + 2 + 0 + 0 +(3 rows) + +-- Reluctant quantifier: prefer shortest match +-- Invalid token combinations +-- * + (invalid combination) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A* +) + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "+" +LINE 6: PATTERN (A* +) + ^ +-- Expected: ERROR: syntax error at or near "+" +-- + * (invalid combination) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ *) + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "*" +LINE 6: PATTERN (A+ *) + ^ +-- Expected: ERROR: syntax error at or near "*" +-- ? ? (parsed as ?? reluctant quantifier) +SELECT COUNT(*) OVER w +FROM rpr_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A? ?) + DEFINE A AS val > 0 +); + count +------- + 0 + 0 + 0 +(3 rows) + +-- Reluctant quantifier: prefer shortest match +DROP TABLE rpr_reluctant; +-- Quantifier boundary conditions +CREATE TABLE rpr_bounds (id INT); +INSERT INTO rpr_bounds VALUES (1), (2); +-- min > max +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{5,3}) + DEFINE A AS id > 0 +); +ERROR: quantifier minimum bound must not exceed maximum +LINE 6: PATTERN (A{5,3}) + ^ +-- Expected: ERROR: quantifier minimum bound must not exceed maximum +-- Large bounds +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{1000,2000}) + DEFINE A AS id > 0 +); + count +------- + 0 + 0 +(2 rows) + +-- Very large bound +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{100000}) + DEFINE A AS id > 0 +); + count +------- + 0 + 0 +(2 rows) + +-- INT_MAX - 1 = 2147483646 (at limit) +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2147483646}) + DEFINE A AS id > 0 +); + count +------- + 0 + 0 +(2 rows) + +-- INT_MAX = 2147483647 (over limit) +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2147483647}) + DEFINE A AS id > 0 +); +ERROR: quantifier bound must be between 1 and 2147483646 +LINE 6: PATTERN (A{2147483647}) + ^ +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 +-- {n,} boundary errors +-- Negative lower bound in {n,} +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{-1,}) + DEFINE A AS id > 0 +); +ERROR: syntax error at or near "-" +LINE 6: PATTERN (A{-1,}) + ^ +-- Expected: ERROR: syntax error at or near "-" +-- INT_MAX in {n,} +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2147483647,}) + DEFINE A AS id > 0 +); +ERROR: quantifier bound must be between 0 and 2147483646 +LINE 6: PATTERN (A{2147483647,}) + ^ +-- Expected: ERROR: quantifier bound must be between 0 and 2147483646 +-- {,m} boundary errors +-- Zero upper bound in {,m} +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,0}) + DEFINE A AS id > 0 +); +ERROR: quantifier bound must be between 1 and 2147483646 +LINE 6: PATTERN (A{,0}) + ^ +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 +-- INT_MAX in {,m} +SELECT COUNT(*) OVER w +FROM rpr_bounds +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{,2147483647}) + DEFINE A AS id > 0 +); +ERROR: quantifier bound must be between 1 and 2147483646 +LINE 6: PATTERN (A{,2147483647}) + ^ +-- Expected: ERROR: quantifier bound must be between 1 and 2147483646 +DROP TABLE rpr_bounds; +-- ============================================================ +-- Navigation Functions Tests (PREV / NEXT / FIRST / LAST) +-- ============================================================ +CREATE TABLE rpr_nav (id INT, val INT); +INSERT INTO rpr_nav VALUES + (1, 10), (2, 20), (3, 15), (4, 25), (5, 30); +-- PREV function - reference previous row in pattern +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_nav +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE + A AS val > 0, + B AS val > PREV(val) +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 3 + 4 | 25 | 0 + 5 | 30 | 0 +(5 rows) + +-- NEXT function - reference next row in pattern +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_nav +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ B) + DEFINE + A AS val < NEXT(val), + B AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 3 + 4 | 25 | 0 + 5 | 30 | 0 +(5 rows) + +-- Combined PREV and NEXT +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_nav +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B C) + DEFINE + A AS val > 0, + B AS val > PREV(val) AND val < NEXT(val), + C AS val > PREV(val) +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 15 | 3 + 4 | 25 | 0 + 5 | 30 | 0 +(5 rows) + +-- PREV function cannot be used other than in DEFINE +SELECT PREV(id), id, val, COUNT(*) OVER w as cnt +FROM rpr_nav +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE + A AS val > 0, + B AS val > PREV(val) +) +ORDER BY id; +ERROR: cannot use prev outside a DEFINE clause +LINE 1: SELECT PREV(id), id, val, COUNT(*) OVER w as cnt + ^ +-- Expected: ERROR: cannot use prev outside a DEFINE clause +-- NEXT function cannot be used other than in DEFINE +SELECT NEXT(id), id, val, COUNT(*) OVER w as cnt +FROM rpr_nav +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE + A AS val > 0, + B AS val > PREV(val) +) +ORDER BY id; +ERROR: cannot use next outside a DEFINE clause +LINE 1: SELECT NEXT(id), id, val, COUNT(*) OVER w as cnt + ^ +-- Expected: ERROR: cannot use next outside a DEFINE clause +-- FIRST function - reference match_start row +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_nav +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE + A AS val > 0, + B AS val > FIRST(val) +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 5 + 2 | 20 | 0 + 3 | 15 | 0 + 4 | 25 | 0 + 5 | 30 | 0 +(5 rows) + +-- LAST function without offset - equivalent to current row's value +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_nav +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE + A AS val > 0, + B AS LAST(val) > PREV(val) +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 3 + 4 | 25 | 0 + 5 | 30 | 0 +(5 rows) + +-- FIRST and LAST combined +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_nav +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE + A AS val > 0, + B AS val > FIRST(val) AND LAST(val) > PREV(val) +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 3 + 4 | 25 | 0 + 5 | 30 | 0 +(5 rows) + +-- FIRST function cannot be used other than in DEFINE +SELECT FIRST(id), id, val FROM rpr_nav; +ERROR: cannot use first outside a DEFINE clause +LINE 1: SELECT FIRST(id), id, val FROM rpr_nav; + ^ +-- Expected: ERROR: cannot use first outside a DEFINE clause +-- LAST function cannot be used other than in DEFINE +SELECT LAST(id), id, val FROM rpr_nav; +ERROR: cannot use last outside a DEFINE clause +LINE 1: SELECT LAST(id), id, val FROM rpr_nav; + ^ +-- Expected: ERROR: cannot use last outside a DEFINE clause +DROP TABLE rpr_nav; +-- ============================================================ +-- SKIP TO / INITIAL Tests +-- ============================================================ +CREATE TABLE rpr_skip (id INT, val INT); +INSERT INTO rpr_skip VALUES + (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), + (6, 6), (7, 7), (8, 8); +-- SKIP TO NEXT ROW +-- SKIP TO NEXT ROW +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_skip +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C) + DEFINE A AS val > 0, B AS val > 2, C AS val > 4 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 1 | 0 + 2 | 2 | 0 + 3 | 3 | 3 + 4 | 4 | 3 + 5 | 5 | 3 + 6 | 6 | 3 + 7 | 7 | 0 + 8 | 8 | 0 +(8 rows) + +-- SKIP PAST LAST ROW +-- SKIP PAST LAST ROW +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_skip +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS val > 0, B AS val > 2, C AS val > 4 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 1 | 0 + 2 | 2 | 0 + 3 | 3 | 3 + 4 | 4 | 0 + 5 | 5 | 0 + 6 | 6 | 3 + 7 | 7 | 0 + 8 | 8 | 0 +(8 rows) + +-- Default behavior (should be SKIP PAST LAST ROW) +-- No SKIP TO clause (default) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_skip +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B) + DEFINE A AS val > 0, B AS val > 1 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 1 | 2 + 2 | 2 | 0 + 3 | 3 | 2 + 4 | 4 | 0 + 5 | 5 | 2 + 6 | 6 | 0 + 7 | 7 | 2 + 8 | 8 | 0 +(8 rows) + +-- Compare default with explicit PAST LAST ROW +-- Results should be identical +WITH default_skip AS ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_skip + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B C) + DEFINE A AS val > 0, B AS val > 2, C AS val > 4 + ) +), +explicit_skip AS ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_skip + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS val > 0, B AS val > 2, C AS val > 4 + ) +) +SELECT 'default' as type, * FROM default_skip +UNION ALL +SELECT 'explicit' as type, * FROM explicit_skip +ORDER BY type, id; + type | id | val | cnt +----------+----+-----+----- + default | 1 | 1 | 0 + default | 2 | 2 | 0 + default | 3 | 3 | 3 + default | 4 | 4 | 0 + default | 5 | 5 | 0 + default | 6 | 6 | 3 + default | 7 | 7 | 0 + default | 8 | 8 | 0 + explicit | 1 | 1 | 0 + explicit | 2 | 2 | 0 + explicit | 3 | 3 | 3 + explicit | 4 | 4 | 0 + explicit | 5 | 5 | 0 + explicit | 6 | 6 | 3 + explicit | 7 | 7 | 0 + explicit | 8 | 8 | 0 +(16 rows) + +DROP TABLE rpr_skip; +-- INITIAL clause +CREATE TABLE rpr_init (id INT, val INT); +INSERT INTO rpr_init VALUES (1, 10), (2, 20), (3, 30), (4, 40); +-- Explicit INITIAL +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_init +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 4 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 +(4 rows) + +-- Implicit INITIAL (default) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_init +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 4 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 +(4 rows) + +DROP TABLE rpr_init; +-- SEEK +CREATE TABLE rpr_seek (id INT, val INT); +INSERT INTO rpr_seek VALUES (1, 10); +-- SEEK keyword +SELECT COUNT(*) OVER w +FROM rpr_seek +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + SEEK + PATTERN (A+) + DEFINE A AS val > 0 +); +ERROR: SEEK is not supported +LINE 6: SEEK + ^ +HINT: Use INITIAL instead. +-- Expected: ERROR: SEEK is not supported +-- HINT: Use INITIAL instead. +DROP TABLE rpr_seek; +-- ============================================================ +-- Serialization/Deserialization Tests +-- ============================================================ +-- View creation and deparsing +CREATE TABLE rpr_serial (id INT, val INT); +INSERT INTO rpr_serial VALUES + (1, 10), (2, 20), (3, 15), (4, 25), (5, 30); +-- Simple pattern +CREATE VIEW rpr_serial_v1 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); +-- Verify view works (tests deserialization) +SELECT * FROM rpr_serial_v1 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 5 + 2 | 20 | 0 + 3 | 15 | 0 + 4 | 25 | 0 + 5 | 30 | 0 +(5 rows) + +-- Verify deparsing +SELECT pg_get_viewdef('rpr_serial_v1'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS cnt + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a+) + + DEFINE + + a AS (val > 0) ); +(1 row) + +-- Complex pattern with alternation +CREATE VIEW rpr_serial_v2 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ | B*) + DEFINE A AS val > 20, B AS val <= 20 +); +SELECT * FROM rpr_serial_v2 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 15 | 0 + 4 | 25 | 2 + 5 | 30 | 0 +(5 rows) + +SELECT pg_get_viewdef('rpr_serial_v2'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS cnt + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a+ | b*) + + DEFINE + + a AS (val > 20), + + b AS (val <= 20) ); +(1 row) + +-- Pattern with grouping and quantifiers +CREATE VIEW rpr_serial_v3 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B){2,5} | C*) + DEFINE + A AS val > 10, + B AS val > 20, + C AS val <= 10 +); +SELECT * FROM rpr_serial_v3 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 1 + 2 | 20 | 0 + 3 | 15 | 0 + 4 | 25 | 0 + 5 | 30 | 0 +(5 rows) + +SELECT pg_get_viewdef('rpr_serial_v3'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS cnt + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a b){2,5} | c*) + + DEFINE + + a AS (val > 10), + + b AS (val > 20), + + c AS (val <= 10) ); +(1 row) + +-- All features combined +CREATE VIEW rpr_serial_v4 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + INITIAL + PATTERN (START (MID{1,3} | ALT+) FINISH) + DEFINE + START AS val > 5, + MID AS val BETWEEN 10 AND 25, + ALT AS val > 25, + FINISH AS val > 15 +); +SELECT * FROM rpr_serial_v4 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 5 + 2 | 20 | 4 + 3 | 15 | 3 + 4 | 25 | 0 + 5 | 30 | 0 +(5 rows) + +SELECT pg_get_viewdef('rpr_serial_v4'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS cnt + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP TO NEXT ROW + + INITIAL + + PATTERN (start (mid{1,3} | alt+) finish) + + DEFINE + + start AS (val > 5), + + mid AS ((val >= 10) AND (val <= 25)), + + alt AS (val > 25), + + finish AS (val > 15) ); +(1 row) + +-- Additional quantifiers for deparsing coverage +-- ? quantifier (zero or one) +CREATE VIEW rpr_serial_v5 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B?) + DEFINE A AS val > 10, B AS val > 20 +); +SELECT * FROM rpr_serial_v5 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 1 + 3 | 15 | 2 + 4 | 25 | 0 + 5 | 30 | 1 +(5 rows) + +SELECT pg_get_viewdef('rpr_serial_v5'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS cnt + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a b?) + + DEFINE + + a AS (val > 10), + + b AS (val > 20) ); +(1 row) + +-- {n,} quantifier (n or more) +CREATE VIEW rpr_serial_v6 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2,}) + DEFINE A AS val > 15 +); +SELECT * FROM rpr_serial_v6 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 15 | 0 + 4 | 25 | 2 + 5 | 30 | 0 +(5 rows) + +SELECT pg_get_viewdef('rpr_serial_v6'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS cnt + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{2,}) + + DEFINE + + a AS (val > 15) ); +(1 row) + +-- {n} quantifier (exactly n) +CREATE VIEW rpr_serial_v7 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{3}) + DEFINE A AS val > 0 +); +SELECT * FROM rpr_serial_v7 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 15 | 0 + 4 | 25 | 0 + 5 | 30 | 0 +(5 rows) + +SELECT pg_get_viewdef('rpr_serial_v7'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS cnt + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{3}) + + DEFINE + + a AS (val > 0) ); +(1 row) + +-- Nested ALT pattern (tests deparse of complex nested structure) +CREATE VIEW rpr_serial_v8 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_serial +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A+ B) | C) D | A B C) + DEFINE A AS val <= 15, B AS val <= 25, C AS val <= 30, D AS val > 30 +); +SELECT * FROM rpr_serial_v8 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 15 | 0 + 4 | 25 | 0 + 5 | 30 | 0 +(5 rows) + +SELECT pg_get_viewdef('rpr_serial_v8'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS cnt + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (((a+ b) | c) d | a b c) + + DEFINE + + a AS (val <= 15), + + b AS (val <= 25), + + c AS (val <= 30), + + d AS (val > 30) ); +(1 row) + +-- Navigation function serialization: PREV with offset +CREATE VIEW rpr_serial_nav1 AS +SELECT id, val, count(*) OVER w +FROM rpr_serial +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE A AS TRUE, B AS val > PREV(val, 2)); +SELECT pg_get_viewdef('rpr_serial_nav1'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS count + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a b+) + + DEFINE + + a AS true, + + b AS (val > PREV(val, (2)::bigint)) ); +(1 row) + +-- Navigation function serialization: FIRST and LAST +CREATE VIEW rpr_serial_nav2 AS +SELECT id, val, count(*) OVER w +FROM rpr_serial +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE A AS TRUE, B AS FIRST(val) < LAST(val, 1)); +SELECT pg_get_viewdef('rpr_serial_nav2'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS count + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a b+) + + DEFINE + + a AS true, + + b AS (FIRST(val) < LAST(val, (1)::bigint)) ); +(1 row) + +-- Navigation function serialization: compound PREV(FIRST()) +CREATE VIEW rpr_serial_nav3 AS +SELECT id, val, count(*) OVER w +FROM rpr_serial +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE A AS TRUE, B AS PREV(FIRST(val, 1), 2) > 0); +SELECT pg_get_viewdef('rpr_serial_nav3'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS count + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a b+) + + DEFINE + + a AS true, + + b AS (PREV(FIRST(val, (1)::bigint), (2)::bigint) > 0) ); +(1 row) + +-- Navigation function serialization: compound NEXT(LAST()) +CREATE VIEW rpr_serial_nav4 AS +SELECT id, val, count(*) OVER w +FROM rpr_serial +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE A AS TRUE, B AS NEXT(LAST(val), 2) IS NOT NULL); +SELECT pg_get_viewdef('rpr_serial_nav4'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS count + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a b+) + + DEFINE + + a AS true, + + b AS (NEXT(LAST(val), (2)::bigint) IS NOT NULL) ); +(1 row) + +-- Navigation function serialization: compound PREV(LAST()) +CREATE VIEW rpr_serial_nav5 AS +SELECT id, val, count(*) OVER w +FROM rpr_serial +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE A AS TRUE, B AS PREV(LAST(val, 1), 2) > 0); +SELECT pg_get_viewdef('rpr_serial_nav5'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS count + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a b+) + + DEFINE + + a AS true, + + b AS (PREV(LAST(val, (1)::bigint), (2)::bigint) > 0) ); +(1 row) + +-- Navigation function serialization: compound NEXT(FIRST()) +CREATE VIEW rpr_serial_nav6 AS +SELECT id, val, count(*) OVER w +FROM rpr_serial +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE A AS TRUE, B AS NEXT(FIRST(val), 3) > 0); +SELECT pg_get_viewdef('rpr_serial_nav6'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS count + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a b+) + + DEFINE + + a AS true, + + b AS (NEXT(FIRST(val), (3)::bigint) > 0) ); +(1 row) + +-- Reluctant {1}? quantifier deparse through ruleutils +CREATE VIEW rpr_quant_reluctant_v AS +SELECT id, val, count(*) OVER w +FROM rpr_serial +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{1}? B) + DEFINE A AS val > 0, B AS val > 0); +SELECT pg_get_viewdef('rpr_quant_reluctant_v'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS count + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{1}? b) + + DEFINE + + a AS (val > 0), + + b AS (val > 0) ); +(1 row) + +-- Quoted identifier round-trip: mixed case and reserved words need quoting +CREATE VIEW rpr_serial_quoted AS +SELECT id, val, count(*) OVER w +FROM rpr_serial +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ("Start" "Up"+) + DEFINE "Start" AS TRUE, "Up" AS val > PREV(val)); +SELECT pg_get_viewdef('rpr_serial_quoted'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS count + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ("Start" "Up"+) + + DEFINE + + "Start" AS true, + + "Up" AS (val > PREV(val)) ); +(1 row) + +-- Materialized view (if supported) +CREATE TABLE rpr_mview (id INT, val INT); +INSERT INTO rpr_mview VALUES (1, 10), (2, 20), (3, 30); +CREATE MATERIALIZED VIEW rpr_mview_v1 AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_mview +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +); +SELECT * FROM rpr_mview_v1 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 30 | 0 +(3 rows) + +SELECT pg_get_viewdef('rpr_mview_v1'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS cnt + + FROM rpr_mview + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a+) + + DEFINE + + a AS (val > 0) ); +(1 row) + +-- Refresh test +REFRESH MATERIALIZED VIEW rpr_mview_v1; +SELECT * FROM rpr_mview_v1 ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 30 | 0 +(3 rows) + +-- CREATE TABLE AS SELECT with RPR +CREATE TABLE rpr_ctas (id INT, val INT); +INSERT INTO rpr_ctas VALUES (1, 10), (2, 20), (3, 15), (4, 25); +CREATE TABLE rpr_ctas_result AS +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_ctas +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS val > PREV(val) +); +SELECT * FROM rpr_ctas_result ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 2 + 4 | 25 | 0 +(4 rows) + +-- INSERT INTO ... SELECT with RPR +CREATE TABLE rpr_insert_target (id INT, val INT, cnt BIGINT); +INSERT INTO rpr_insert_target +SELECT id, val, count(*) OVER w +FROM rpr_ctas +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS TRUE, B AS val > PREV(val) +); +SELECT * FROM rpr_insert_target ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 2 + 4 | 25 | 0 +(4 rows) + +DROP TABLE rpr_ctas_result; +DROP TABLE rpr_insert_target; +DROP TABLE rpr_ctas; +-- Prepared statements (tests outfuncs.c / readfuncs.c) +CREATE TABLE rpr_prep (id INT, val INT); +INSERT INTO rpr_prep VALUES (1, 10), (2, 20), (3, 30); +-- Simple prepared statement +PREPARE rpr_prep_simple AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_prep +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; +EXECUTE rpr_prep_simple; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 30 | 0 +(3 rows) + +EXECUTE rpr_prep_simple; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 30 | 0 +(3 rows) + +DEALLOCATE rpr_prep_simple; +-- Prepared statement with parameters +PREPARE rpr_prep_param(int) AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_prep +WHERE id <= $1 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 10 +) +ORDER BY id; +EXECUTE rpr_prep_param(2); + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 1 +(2 rows) + +EXECUTE rpr_prep_param(3); + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 2 + 3 | 30 | 0 +(3 rows) + +DEALLOCATE rpr_prep_param; +-- Complex prepared statement +PREPARE rpr_prep_complex AS +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_prep +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A B){1,2} | C+) + DEFINE + A AS val > 5, + B AS val > 15, + C AS val <= 15 +) +ORDER BY id; +EXECUTE rpr_prep_complex; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 2 + 3 | 30 | 0 +(3 rows) + +EXECUTE rpr_prep_complex; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 2 + 3 | 30 | 0 +(3 rows) + +DEALLOCATE rpr_prep_complex; +DROP TABLE rpr_prep; +-- CTE and Subquery (tests copyfuncs.c) +CREATE TABLE rpr_copy (id INT, val INT); +INSERT INTO rpr_copy VALUES (1, 10), (2, 20), (3, 30), (4, 40); +-- Simple CTE +WITH rpr_cte AS ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_copy + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) +SELECT * FROM rpr_cte ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 4 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 +(4 rows) + +-- CTE with multiple references (forces node copy) +WITH rpr_cte AS ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_copy + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 15 + ) +) +SELECT c1.id, c1.cnt as cnt1, c2.cnt as cnt2 +FROM rpr_cte c1 +JOIN rpr_cte c2 ON c1.id = c2.id +ORDER BY c1.id; + id | cnt1 | cnt2 +----+------+------ + 1 | 0 | 0 + 2 | 3 | 3 + 3 | 0 | 0 + 4 | 0 | 0 +(4 rows) + +-- Subquery in FROM clause +SELECT * +FROM ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_copy + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B?) + DEFINE A AS val > 10, B AS val > 20 + ) +) sub +WHERE cnt > 0 +ORDER BY id; + id | val | cnt +----+-----+----- + 2 | 20 | 2 + 4 | 40 | 1 +(2 rows) + +-- Nested subqueries +SELECT * +FROM ( + SELECT * + FROM ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_copy + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val >= 10 + ) + ) inner_sub + WHERE cnt > 0 +) outer_sub +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 4 +(1 row) + +DROP TABLE rpr_copy; +-- DISTINCT and set operations (tests equalfuncs.c) +CREATE TABLE rpr_equal (id INT, val INT); +INSERT INTO rpr_equal VALUES (1, 10), (2, 20), (3, 10), (4, 20); +-- DISTINCT with RPR +SELECT DISTINCT cnt +FROM ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_equal + WINDOW w AS ( + ORDER BY val + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE A AS val > 0 + ) +) sub +ORDER BY cnt; + cnt +----- + 1 + 2 + 3 + 4 +(4 rows) + +-- UNION with RPR in both sides +SELECT id, val, cnt FROM ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_equal + WHERE val = 10 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) sub1 +UNION +SELECT id, val, cnt FROM ( + SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_equal + WHERE val = 20 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) sub2 +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 2 + 3 | 10 | 0 + 4 | 20 | 0 +(4 rows) + +-- UNION ALL +SELECT id, cnt FROM ( + SELECT id, COUNT(*) OVER w as cnt + FROM rpr_equal + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 10 + ) +) sub +UNION ALL +SELECT id, cnt FROM ( + SELECT id, COUNT(*) OVER w as cnt + FROM rpr_equal + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (B+) + DEFINE B AS val <= 10 + ) +) sub +ORDER BY id, cnt; + id | cnt +----+----- + 1 | 0 + 1 | 1 + 2 | 0 + 2 | 1 + 3 | 0 + 3 | 1 + 4 | 0 + 4 | 1 +(8 rows) + +-- INTERSECT +SELECT id, cnt FROM ( + SELECT id, COUNT(*) OVER w as cnt + FROM rpr_equal + WHERE id <= 3 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) sub1 +INTERSECT +SELECT id, cnt FROM ( + SELECT id, COUNT(*) OVER w as cnt + FROM rpr_equal + WHERE id >= 2 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) sub2 +ORDER BY id; + id | cnt +----+----- + 3 | 0 +(1 row) + +DROP TABLE rpr_equal; +-- View with multiple window definitions +CREATE TABLE rpr_multiwin (id INT, val INT); +INSERT INTO rpr_multiwin VALUES (1, 10), (2, 20), (3, 30); +CREATE VIEW rpr_multiwin_v AS +SELECT + id, + val, + COUNT(*) OVER w1 as cnt1, + COUNT(*) OVER w2 as cnt2 +FROM rpr_multiwin +WINDOW + w1 AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 15 + ), + w2 AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (B*) + DEFINE B AS val <= 15 + ); +SELECT * FROM rpr_multiwin_v ORDER BY id; + id | val | cnt1 | cnt2 +----+-----+------+------ + 1 | 10 | 0 | 1 + 2 | 20 | 2 | 0 + 3 | 30 | 0 | 0 +(3 rows) + +SELECT pg_get_viewdef('rpr_multiwin_v'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------------------- + SELECT id, + + val, + + count(*) OVER w1 AS cnt1, + + count(*) OVER w2 AS cnt2 + + FROM rpr_multiwin + + WINDOW w1 AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a+) + + DEFINE + + a AS (val > 15) ), w2 AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (b*) + + DEFINE + + b AS (val <= 15) ); +(1 row) + +-- {n} quantifier display in view +CREATE VIEW rpr_quant_n_v AS +SELECT id, val, count(*) OVER w +FROM rpr_serial +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{3}) + DEFINE A AS val > 0); +SELECT pg_get_viewdef('rpr_quant_n_v'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS count + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{3}) + + DEFINE + + a AS (val > 0) ); +(1 row) + +-- {n,} quantifier display in view +CREATE VIEW rpr_quant_n_plus_v AS +SELECT id, val, count(*) OVER w +FROM rpr_serial +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,}) + DEFINE A AS val > 0); +SELECT pg_get_viewdef('rpr_quant_n_plus_v'::regclass); + pg_get_viewdef +------------------------------------------------------------------------------ + SELECT id, + + val, + + count(*) OVER w AS count + + FROM rpr_serial + + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{2,}) + + DEFINE + + a AS (val > 0) ); +(1 row) + +-- ============================================================ +-- Error Cases Tests +-- ============================================================ +DROP TABLE IF EXISTS rpr_err; +CREATE TABLE rpr_err (id INT, val INT); +INSERT INTO rpr_err VALUES (1, 10), (2, 20); +-- Syntax errors +-- Invalid quantifier syntax +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+!) + DEFINE A AS val > 0 +); +ERROR: unsupported quantifier "+!" +LINE 6: PATTERN (A+!) + ^ +HINT: Valid quantifiers are: *, +, ?, *?, +?, ??, {n}, {n,}, {,m}, {n,m} and their reluctant versions. +-- Expected: Syntax error +-- Unmatched parentheses +SET client_min_messages = NOTICE; +DO $$ +BEGIN + EXECUTE 'SELECT COUNT(*) OVER w FROM rpr_err WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A B) DEFINE A AS val > 0, B AS val > 10)'; + RAISE NOTICE 'Unmatched parentheses: UNEXPECTED SUCCESS'; +EXCEPTION + WHEN syntax_error THEN + RAISE NOTICE 'Unmatched parentheses: EXPECTED ERROR - %', SQLERRM; + WHEN OTHERS THEN + RAISE NOTICE 'Unmatched parentheses: UNEXPECTED ERROR - %', SQLERRM; +END $$; +NOTICE: Unmatched parentheses: EXPECTED ERROR - syntax error at or near "AS" +SET client_min_messages = WARNING; +-- Empty DEFINE +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE +); +ERROR: syntax error at or near ")" +LINE 8: ); + ^ +-- Expected: Syntax error +-- Empty PATTERN +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN () + DEFINE A AS val > 0 +); +ERROR: syntax error at or near ")" +LINE 6: PATTERN () + ^ +-- Expected: Syntax error +-- DEFINE without PATTERN (PATTERN and DEFINE must be used together) +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + DEFINE A AS val > 0 +); +ERROR: syntax error at or near "DEFINE" +LINE 6: DEFINE A AS val > 0 + ^ +-- Expected: Syntax error +-- Qualified column references (NOT SUPPORTED) +-- Pattern variable qualified name: not supported (valid per SQL standard 4.16, not yet implemented) +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS A.val > 0 +); +ERROR: pattern variable qualified column reference "a.val" is not supported in DEFINE clause +LINE 7: DEFINE A AS A.val > 0 + ^ +-- Expected: ERROR: pattern variable qualified column reference "a.val" is not supported +-- PATTERN-only variable qualified name: not supported even without DEFINE entry +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ B+) + DEFINE A AS B.val > 0 +); +ERROR: pattern variable qualified column reference "b.val" is not supported in DEFINE clause +LINE 7: DEFINE A AS B.val > 0 + ^ +-- Expected: ERROR: pattern variable qualified column reference "b.val" is not supported +-- DEFINE-only variable qualified name: still a pattern variable, not a range variable +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0, B AS B.val > 0 +); +ERROR: DEFINE variable "b" is not used in PATTERN +LINE 7: DEFINE A AS val > 0, B AS B.val > 0 + ^ +-- Expected: ERROR: pattern variable qualified column reference "b.val" is not supported +-- FROM-clause range variable qualified name: not allowed (prohibited by SQL standard 6.5) +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS rpr_err.val > 0 +); +ERROR: range variable qualified column reference "rpr_err.val" is not allowed in DEFINE clause +LINE 7: DEFINE A AS rpr_err.val > 0 + ^ +-- Expected: ERROR: range variable qualified column reference "rpr_err.val" is not allowed +-- Semantic errors +-- Undefined column in DEFINE +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS nonexistent_column > 0 +); +ERROR: column "nonexistent_column" does not exist +LINE 7: DEFINE A AS nonexistent_column > 0 + ^ +-- Expected: ERROR: column "nonexistent_column" does not exist +-- Type mismatch +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 'string' +); +ERROR: invalid input syntax for type integer: "string" +LINE 7: DEFINE A AS val > 'string' + ^ +-- Expected: ERROR: invalid input syntax for type integer: "string" +-- Aggregate function in DEFINE (if not allowed) +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS COUNT(*) > 0 +); +ERROR: aggregate functions are not allowed in DEFINE +LINE 7: DEFINE A AS COUNT(*) > 0 + ^ +-- Expected: ERROR: aggregate functions are not allowed in DEFINE +-- Subquery in DEFINE (NOT SUPPORTED) +SELECT COUNT(*) OVER w +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > (SELECT max(val) FROM rpr_err) +); +ERROR: cannot use subquery in DEFINE expression +LINE 7: DEFINE A AS val > (SELECT max(val) FROM rpr_err) + ^ +-- Expected: ERROR: cannot use subquery in DEFINE expression +-- Edge cases +-- Pattern variable not used (should work, extra vars ignored) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_err +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0, B AS val > 5, C AS val > 10 +) +ORDER BY id; +ERROR: DEFINE variable "b" is not used in PATTERN +LINE 7: DEFINE A AS val > 0, B AS val > 5, C AS val > 10 + ^ +DROP TABLE rpr_err; +-- NULL handling +CREATE TABLE rpr_null (id INT, val INT); +INSERT INTO rpr_null VALUES (1, 10), (2, NULL), (3, 30); +-- NULL in DEFINE expression +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_null +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 15 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | | 0 + 3 | 30 | 1 +(3 rows) + +-- IS NULL in DEFINE +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_null +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (N+) + DEFINE N AS val IS NULL +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | | 1 + 3 | 30 | 0 +(3 rows) + +-- IS NOT NULL in DEFINE +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_null +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (NN+) + DEFINE NN AS val IS NOT NULL +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 1 + 2 | | 0 + 3 | 30 | 1 +(3 rows) + +DROP TABLE rpr_null; +-- Compound navigation: inner nav must be direct arg (not nested in expression) +SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS PREV(v + FIRST(v)) > 0 +); +ERROR: row pattern navigation operation must be a direct argument of the outer navigation +LINE 6: DEFINE A AS PREV(v + FIRST(v)) > 0 + ^ +HINT: Only PREV(FIRST()), PREV(LAST()), NEXT(FIRST()), and NEXT(LAST()) compound forms are allowed. +-- FIRST/LAST wrapping FIRST/LAST: prohibited +SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS FIRST(FIRST(v)) > 0 +); +ERROR: FIRST and LAST cannot contain FIRST or LAST +LINE 6: DEFINE A AS FIRST(FIRST(v)) > 0 + ^ +HINT: Only PREV(FIRST()), PREV(LAST()), NEXT(FIRST()), and NEXT(LAST()) compound forms are allowed. +-- Triple nesting: prohibited (3-level deep navigation) +SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS PREV(FIRST(PREV(v))) > 0 +); +ERROR: cannot nest row pattern navigation more than two levels deep +LINE 6: DEFINE A AS PREV(FIRST(PREV(v))) > 0 + ^ +HINT: Only PREV(FIRST()), PREV(LAST()), NEXT(FIRST()), and NEXT(LAST()) compound forms are allowed. +-- ============================================================ +-- Window Deduplication Tests +-- ============================================================ +-- non-RPR and RPR windows with identical base frame are kept separate. +SELECT id, val, + first_value(id) OVER ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + ) AS fv_normal, + first_value(id) OVER w1 AS fv_rpr +FROM (VALUES (1, 10), (2, 20), (3, 30), (4, 40)) AS t(id, val) +WINDOW w1 AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 10 +); + id | val | fv_normal | fv_rpr +----+-----+-----------+-------- + 1 | 10 | 1 | + 2 | 20 | 2 | 2 + 3 | 30 | 3 | + 4 | 40 | 4 | +(4 rows) + +-- ============================================================ +-- Pattern Optimization Tests +-- ============================================================ +-- Tests for pattern optimization in optimizer/plan/rpr.c +-- Use EXPLAIN to verify optimized pattern (shown as "Pattern: ...") +CREATE TABLE rpr_plan (id INT, val INT); +INSERT INTO rpr_plan VALUES + (1, 10), (2, 20), (3, 30), (4, 40), (5, 50), + (6, 60), (7, 70), (8, 80), (9, 90), (10, 100); +-- Consecutive VAR merge: A A A -> a{3} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A A A) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{3} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Consecutive VAR merge: A{2} A{3} -> a{5} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2} A{3}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{5} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Consecutive VAR merge: A+ A* -> a+ +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ A*) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Consecutive VAR merge: A A+ -> a{2,} +-- Tests line 251: child->max == RPR_QUANTITY_INF branch in mergeConsecutiveVars +-- prev: A{1,1} (finite), child: A+ (infinite) triggers line 251 evaluation +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A A+) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{2,}" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Consecutive GROUP merge with finite quantifiers: ((A B){5}) ((A B){10}) -> merged +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B){5}) ((A B){10})) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b){15} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Consecutive GROUP merge with unbounded: (A B)+ (A B)+ -> (a b){2,} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)+ (A B)+) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){2,}" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Consecutive GROUP merge: (A B){2} (A B)+ -> (a b){3,} +-- Tests line 325: child->max == RPR_QUANTITY_INF branch in mergeConsecutiveGroups +-- prev: (A B){2,2} (finite), child: (A B)+ (infinite) triggers line 325 evaluation +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B){2} (A B)+) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){3,}" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- PREFIX merge: A B (A B)+ -> (a b){2,} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (A B)+) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){2,}" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- PREFIX and SUFFIX merge: A B (A B)+ A B -> (a b){3,} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (A B)+ A B) DEFINE A AS val <= 40, B AS val > 40); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){3,}" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Flatten nested: A ((B) (C)) -> a b c +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B) (C))) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b c + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Data execution: SEQ flatten produces correct results +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A ((B) (C))) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- ALT flatten: (A | (B | C))+ -> (a | b | c)+ +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | (B | C))+) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b | c)+ + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- ALT deduplicate: (A | B | A) -> (a | b) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B | A)+) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b)+ + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Data execution: ALT dedup produces correct results +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | A)+) DEFINE A AS val <= 50, B AS val > 50); + id | val | cnt +----+-----+----- + 1 | 10 | 10 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Quantifier multiply: (A{2}){3} -> a{6} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2}){3}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{6} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Quantifier multiply with child range: (A{2,3}){3} -> a{6,9} +-- outer exact, child range - optimization applies +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2,3}){3}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{6,9} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Quantifier NO multiply: (A{2}){2,3} stays as (a{2}){2,3} +-- outer range - gaps would occur (4,6 not 4,5,6), no optimization +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2}){2,3}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2}){2,3} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Quantifier NO multiply: (A{2}){2,} stays as (a{2}){2,} +-- outer unbounded - gaps would occur (4,6,8,... not 4,5,6,...), no optimization +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2}){2,}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2}'){2,}" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Quantifier multiply: (A){2,} -> a{2,} +-- child exact 1 - no gaps, optimization applies +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A){2,}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{2,}" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Quantifier multiply: (A)+ -> a+ +-- child exact 1 - no gaps, optimization applies +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A)+) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Quantifier NO multiply: (A{2}){3,5} stays as (a{2}){3,5} +-- outer range, child exact > 1 - gaps would occur (6,8,10 not 6,7,8,9,10) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2}){3,5}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2}){3,5} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Quantifier NO multiply: (A{2,3}){2,3} stays as (a{2,3}){2,3} +-- outer range, child range - gaps possible (e.g., (A{4,5}){2,3} misses 11) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2,3}){2,3}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2,3}){2,3} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Nested unbounded: (A*)* -> a* +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A*)*) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a*" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Nested unbounded: (A+)* -> a* +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A+)*) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a*" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Nested unbounded: (A+)+ -> a+ +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A+)+) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Unwrap GROUP{1,1}: (A) -> a +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A)) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Unwrap GROUP{1,1}: (A B) -> a b +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Combined optimization: A A (B B)+ B B C C C -> a{2} (b{2}){2,} c{3} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A A (B B)+ B B C C C) + DEFINE A AS val <= 20, B AS val > 20 AND val <= 70, C AS val > 70); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{2} (b{2}){2,} c{3} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Consecutive GROUP merge with unbounded: (A+) (A+) -> a{2,} +-- Tests mergeConsecutiveGroups with child->max == INF +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A+) (A+)) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{2,}" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Consecutive GROUP merge finite: (A{10}){20} -> a{200} +-- Tests mergeConsecutiveGroups with both finite +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{10}){20}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{200} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Different GROUP prevents merge: (A B){2} (C D){3} +-- Tests mergeConsecutiveGroups flush previous +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B){2} (C D){3}) + DEFINE A AS val <= 25, B AS val > 25 AND val <= 50, + C AS val > 50 AND val <= 75, D AS val > 75); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b){2} (c d){3} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Different children count prevents merge: (A B)+ (A B C)+ +-- Tests rprPatternChildrenEqual length check +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)+ (A B C)+) + DEFINE A AS val <= 33, B AS val > 33 AND val <= 66, C AS val > 66); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b')+" (a b c)+ + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- PREFIX only merge: A B (A B)+ -> (a b){2,} +-- Tests mergeGroupPrefixSuffix: absorb preceding elements into GROUP min +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (A B)+) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){2,}" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- SUFFIX only merge: (A B)+ A B -> (a b){2,} +-- Tests mergeGroupPrefixSuffix: absorb following elements into GROUP min +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)+ A B) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){2,}" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Multiple SUFFIX absorption with skipUntil: (A B)+ A B A B C +-- Tests mergeGroupPrefixSuffix: skip absorbed suffix elements +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)+ A B A B C) + DEFINE A AS val <= 50, B AS val > 50 AND val <= 75, C AS val > 75); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){3,}" c + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- PREFIX merge with remaining prefix: A B C D (C D)+ +-- Tests mergeGroupPrefixSuffix: trimmed list reconstruction +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B C D (C D)+) + DEFINE A AS val <= 25, B AS val > 25 AND val <= 50, + C AS val > 50 AND val <= 75, D AS val > 75); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b (c d){2,} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- PREFIX merge with quantifiers: A B* (A B*)+ -> (a b*){2,} +-- Tests mergeGroupPrefixSuffix: quantifier comparison in rprPatternEqual +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B* (A B*)+) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b*){2,} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- PREFIX merge with multiple quantifiers: A+ B* C? (A+ B* C?)+ -> (a+ b* c?){2,} +-- Tests mergeGroupPrefixSuffix: complex quantifier patterns +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ B* C? (A+ B* C?)+) + DEFINE A AS val <= 30, B AS val > 30 AND val <= 60, C AS val > 60); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a+" b* c?){2,} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- SUFFIX merge with quantifiers: (A B*)+ A B* -> (a b*){2,} +-- Tests mergeGroupPrefixSuffix: suffix with quantifiers +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B*)+ A B*) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b*){2,} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Unwrap GROUP{1,1}: ((A | B | C)) -> (a | b | c) +-- Tests tryUnwrapGroup removing redundant outer GROUP +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B | C)) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b | c) + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Data execution: GROUP unwrap produces correct results +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A | B | C)) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); + id | val | cnt +----+-----+----- + 1 | 10 | 1 + 2 | 20 | 1 + 3 | 30 | 1 + 4 | 40 | 1 + 5 | 50 | 1 + 6 | 60 | 1 + 7 | 70 | 1 + 8 | 80 | 1 + 9 | 90 | 1 + 10 | 100 | 1 +(10 rows) + +-- Reluctant optimization bypass: VAR merge +-- A+? A stays as a+? a (greedy A+ A merges to a{2,}) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+? A) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+? a + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Reluctant optimization bypass: GROUP merge +-- (A B)+? (A B) stays separate (greedy merges to (a b){2,}) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)+? (A B)) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b)+? a b + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Reluctant optimization bypass: quantifier multiply (outer reluctant) +-- (A{2}){3}? stays as (a{2}){3}? (greedy merges to a{6}) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2}){3}?) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2}){3}? + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Reluctant optimization bypass: quantifier multiply (inner reluctant) +-- (A{2}?){3} stays as (a{2}?){3} (greedy merges to a{6}) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2}?){3}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2}?){3} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Reluctant optimization bypass: PREFIX merge +-- A B (A B)+? stays separate (greedy merges to (a b){2,}) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (A B)+?) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b (a b)+? + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Reluctant optimization bypass: SUFFIX merge +-- (A B)+? A B stays separate (greedy merges to (a b){2,}) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)+? A B) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b)+? a b + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- GROUP unwrap with quantifier propagation: (A)?? B -> a?? b +-- Single VAR child {1,1} receives GROUP's quantifier and reluctant +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A)?? B) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a?? b + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Reluctant preserved through ALT flatten +-- (A | (B | C))+? flattens to (a | b | c)+? - inner ALT flattened, reluctant kept +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | (B | C))+?) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b | c)+? + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Reluctant optimization bypass: absorption flags +-- A+? with SKIP PAST LAST ROW - no absorption markers (greedy A+ gets a+") +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A+?) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+? + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Duplicate GROUP removal: ((A | B)+ | (A | B)+) -> (a | b)+ +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B)+ | (A | B)+) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b)+ + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Consecutive VAR merge with zero-min: A* A+ -> a+ +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A* A+) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Consecutive VAR merge (4-element): A A{2} A+ A{3} -> a{7,} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A A{2} A+ A{3}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{7,}" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- PREFIX+SUFFIX merge (5-way): A B A B (A B)+ A B A B -> (a b){5,} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B A B (A B)+ A B A B) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){5,}" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Unwrap single-item ALT after dedup: (A | A)+ -> a+ +-- ALT dedup reduces to single-item, then GROUP unwrap +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | A)+) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- GROUP{1,1} to SEQ with flatten: ((A B)(C D)) -> a b c d +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B)(C D))) + DEFINE A AS val <= 25, B AS val > 25 AND val <= 50, + C AS val > 50 AND val <= 75, D AS val > 75); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b c d + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Nested ALT pattern: ((A B) | C) D | A B C +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B) | C) D | A B C) + DEFINE A AS val <= 25, B AS val > 25 AND val <= 50, + C AS val > 50 AND val <= 75, D AS val > 75); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a b | c) d | a b c) + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Nested ALT with unbounded: ((A+ B) | C) D | A B C +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A+ B) | C) D | A B C) + DEFINE A AS val <= 25, B AS val > 25 AND val <= 50, + C AS val > 50 AND val <= 75, D AS val > 75); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a+" b | c) d | a b c) + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- ============================================================ +-- Absorption Flag Display Tests +-- ============================================================ +-- Tests absorption marker display in EXPLAIN output +-- Markers: ' = branch element, " = judgment point +-- Files: explain.c (append_rpr_quantifier, deparse_rpr_pattern) +-- Simple VAR: A+ -> a+" (judgment point) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A+) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- GROUP unbounded: (A B)+ -> (a' b')+" (branch + judgment) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A B)+) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b')+" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- ALT both absorbable: A+ | B+ -> (a+" | b+") +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A+ | B+) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a+" | b+") + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- ALT one absorbable: A+ | B -> (a+" | b) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A+ | B) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a+" | b) + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Sequence with absorbable start: A+ B -> a+" b +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A+ B) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Complex nested: ((A+ B) | C) D | A B C - deeply nested ALT +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (((A+ B) | C) D | A B C) + DEFINE A AS val <= 30, B AS val <= 60, C AS val <= 80, D AS val > 80); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a+" b | c) d | a b c) + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Nested unbounded: (A+ | B)+ -> (a+" | b)+ (first iteration absorbable) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A+ | B)+) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a+" | b)+ + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- ALT inside unbounded GROUP: (A+ B | A B)* -> (a+" b | a b)* (first iteration absorbable) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A+ B | A B)*) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a+" b | a b)* + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Fixed-length group absorbable: (A{2} B{3})+ -> (a{2}' b{3}'){2,}" +-- All children have min == max, equivalent to unrolling to {1,1} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A{2} B{3})+) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2}' b{3}')+" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Nested fixed-length group: (A (B C){2} D)+ -> absorbable +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A (B C){2} D)+) + DEFINE A AS val <= 20, B AS val <= 40, C AS val <= 60, D AS val > 60); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' (b' c'){2}' d')+" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Nested fixed-length with inner quantifier: ((A{2} B{3}){2})+ -> absorbable +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (((A{2} B{3}){2})+) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a{2}' b{3}'){2}')+" + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Non-absorbable fixed-length: (A B{2,5})+ -> no markers (min != max) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A B{2,5})+) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b{2,5})+ + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Non-absorbable fixed-length: (A B?)+ -> no markers (min != max) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A B?)+) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b?)+ + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Non-absorbable (unbounded not at start): A B+ -> a b+ (no markers) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A B+) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Non-absorbable (no unbounded branch): (A | B){2,} -> (a | b){2,} (no markers) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A | B){2,}) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b){2,} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Non-absorbable (SKIP TO NEXT ROW): A+ -> a+ (no markers) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW PATTERN (A+) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+ + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Non-absorbable (limited frame): A+ -> a+ (no markers) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND 10 FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A+) DEFINE A AS val > 0); + QUERY PLAN +---------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND '10'::bigint FOLLOWING) + Pattern: a+ + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + +-- Reluctant {1}? quantifier deparse +-- A{1}? is a reluctant {1,1} quantifier. The deparse code must +-- output "{1}" explicitly to disambiguate from a bare "?" quantifier +-- (which would mean {0,1}). +EXPLAIN (COSTS OFF) SELECT count(*) OVER w +FROM rpr_plan +WINDOW w AS ( + ORDER BY val + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{1}? B) + DEFINE A AS val > 0, B AS val > 0 +); + QUERY PLAN +-------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY val ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{1}? b + Nav Mark Lookback: 0 + -> Sort + Sort Key: val + -> Seq Scan on rpr_plan +(7 rows) + +-- ============================================================ +-- Absorption Analysis Tests +-- ============================================================ +-- Tests context absorption optimization (O(n^2) -> O(n)) +-- Files: rpr.c (computeAbsorbability) +-- Simple Absorbable Pattern: A+ B +-- Pattern starts with unbounded VAR +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS val <= 50, B AS val > 50 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 6 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Absorbable GROUP Pattern: (A B)+ C +-- Pattern starts with unbounded GROUP +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C) + DEFINE A AS val <= 30, B AS val > 30 AND val <= 60, C AS val > 60 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Non-Absorbable: Unbounded Not at Start +-- Pattern: A B+ (unbounded not at start) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE A AS val <= 50, B AS val > 50 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 6 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- ALT with Absorbable Branches +-- Pattern: (A+ | B+) C - both branches absorbable +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A+ | B+) C) + DEFINE A AS val <= 30, B AS val > 30 AND val <= 60, C AS val > 60 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 4 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- ALT with Mixed Branches +-- Pattern: (A+ | B C) - only first branch absorbable +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A+ | B C)+) + DEFINE A AS val <= 30, B AS val > 30 AND val <= 60, C AS val > 60 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 2 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Non-Absorbable: ALT Inside GROUP +-- Pattern: (A | B){2,} - ALT inside unbounded GROUP +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B){2,}) + DEFINE A AS val <= 50, B AS val > 50 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 10 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Non-Absorbable: Nested Unbounded +-- Pattern: ((A B)+ C)+ - nested GROUP structure +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B)+ C)+) + DEFINE A AS val <= 30, B AS val > 30 AND val <= 60, C AS val > 60 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Non-Absorbable: Unbounded Element Inside GROUP +-- Pattern: (A B+){2,} - unbounded inside GROUP +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B+){2,}) + DEFINE A AS val <= 50, B AS val > 50 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Runtime Conditions: SKIP TO NEXT ROW +-- Absorption disabled with SKIP TO NEXT ROW +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE A AS val <= 50, B AS val > 50 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 6 + 2 | 20 | 5 + 3 | 30 | 4 + 4 | 40 | 3 + 5 | 50 | 2 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Runtime Conditions: Limited Frame +-- Absorption disabled with limited frame end +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS val <= 50, B AS val > 50 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 6 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- ============================================================ +-- Edge Case Tests +-- ============================================================ +-- Tests boundary conditions and complex scenarios +-- Empty Match Prevention +-- Pattern that could match empty: A* +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A*) + DEFINE A AS val > 1000 -- Never matches +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- All Rows Match +-- Pattern where every row matches +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val >= 0 -- Always true +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 10 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Large Quantifiers +-- Pattern: A{100} (large exact quantifier) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{100}) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Pattern: A{10,20} (large range quantifier) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{10,20}) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 10 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Complex Multi-Level Nesting +-- Pattern: (((A B) | C)+ D)+ +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((((A B) | C)+ D)+) + DEFINE A AS val <= 20, B AS val > 20 AND val <= 40, + C AS val > 40 AND val <= 60, D AS val > 60 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 3 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Long Alternation Chain +-- Pattern: A | B | C | D | E (5-way ALT) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A | B | C | D | E) + DEFINE A AS val = 10, B AS val = 30, C AS val = 50, + D AS val = 70, E AS val = 90 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 1 + 2 | 20 | 0 + 3 | 30 | 1 + 4 | 40 | 0 + 5 | 50 | 1 + 6 | 60 | 0 + 7 | 70 | 1 + 8 | 80 | 0 + 9 | 90 | 1 + 10 | 100 | 0 +(10 rows) + +-- Long Sequence +-- Pattern: A B C D E F G H (8-element SEQ) +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B C D E F G H) + DEFINE A AS val >= 10, B AS val >= 20, C AS val >= 30, + D AS val >= 40, E AS val >= 50, F AS val >= 60, + G AS val >= 70, H AS val >= 80 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 8 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Interleaved Quantifiers +-- Pattern: A{2} B+ C{3,5} D* E{1,} +SELECT id, val, COUNT(*) OVER w as cnt +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A{2} B+ C{3,5} D* E{1,}) + DEFINE A AS val > 0, B AS val > 0, C AS val > 0, + D AS val > 0, E AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 10 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- ============================================================ +-- Optimization Fallback Tests +-- ============================================================ +-- Tests for optimization edge cases and fallback behavior +CREATE TABLE rpr_fallback (id INT, val INT); +INSERT INTO rpr_fallback VALUES (1, 10), (2, 20); +-- Test: min quantifier overflow causes optimization fallback (min == max case) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_fallback +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2000000000}){2}) + DEFINE A AS val > 0 +); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2000000000}){2} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_fallback +(7 rows) + +-- Expected: Fallback - pattern not merged due to min overflow (4000000000 > INT32_MAX) +-- Test: max-only quantifier overflow causes optimization fallback +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_fallback +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{1,2000000000}){2}) + DEFINE A AS val > 0 +); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{1,2000000000}){2} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_fallback +(7 rows) + +-- Expected: Fallback - min OK (2*1=2), but max overflow (2*2000000000 > INT32_MAX) +-- Test: max quantifier exceeds valid range (2147483647 = INT_MAX, limit is 2147483646) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_fallback +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2000000000,2147483647}){2}) + DEFINE A AS val > 0 +); +ERROR: quantifier bounds must be between 0 and 2147483646 with max >= 1 +LINE 6: PATTERN ((A{2000000000,2147483647}){2}) + ^ +-- Expected: ERROR at parse time before optimization +-- Test: nested unbounded with large min causes overflow fallback +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_fallback +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2000000000,}){2000000000,}) + DEFINE A AS val > 0 +); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2000000000,}"){2000000000,} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_fallback +(7 rows) + +-- Expected: Fallback - min overflow (2000000000 * 2000000000 > INT32_MAX) +-- Test: prefix mismatch causes optimization fallback +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_fallback +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (C D)+) + DEFINE A AS val > 0, B AS val > 5, C AS val > 10, D AS val > 15 +); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b (c d)+ + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_fallback +(7 rows) + +-- Expected: Fallback - prefix elements don't match GROUP content +DROP TABLE rpr_fallback; +-- ============================================================ +-- Planner Integration Tests +-- ============================================================ +-- Tests full planning pipeline and WindowAgg plan node creation +-- Files: planner.c, createplan.c +CREATE TABLE rpr_planner (id INT, category VARCHAR(10), val INT); +INSERT INTO rpr_planner VALUES + (1, 'A', 10), (2, 'A', 20), (3, 'A', 30), + (4, 'B', 40), (5, 'B', 50), (6, 'B', 60), + (7, 'C', 70), (8, 'C', 80), (9, 'C', 90); +-- Multiple Window Functions in Same Query +SELECT id, category, val, + COUNT(*) OVER w1 as cnt1, + COUNT(*) OVER w2 as cnt2 +FROM rpr_planner +WINDOW w1 AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +), +w2 AS ( + PARTITION BY category + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (B+) + DEFINE B AS val >= 40 +) +ORDER BY id; + id | category | val | cnt1 | cnt2 +----+----------+-----+------+------ + 1 | A | 10 | 9 | 0 + 2 | A | 20 | 0 | 0 + 3 | A | 30 | 0 | 0 + 4 | B | 40 | 0 | 3 + 5 | B | 50 | 0 | 0 + 6 | B | 60 | 0 | 0 + 7 | C | 70 | 0 | 3 + 8 | C | 80 | 0 | 0 + 9 | C | 90 | 0 | 0 +(9 rows) + +-- Window Function with PARTITION BY +SELECT id, category, val, + COUNT(*) OVER w as cnt +FROM rpr_planner +WINDOW w AS ( + PARTITION BY category + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY category, id; + id | category | val | cnt +----+----------+-----+----- + 1 | A | 10 | 3 + 2 | A | 20 | 0 + 3 | A | 30 | 0 + 4 | B | 40 | 3 + 5 | B | 50 | 0 + 6 | B | 60 | 0 + 7 | C | 70 | 3 + 8 | C | 80 | 0 + 9 | C | 90 | 0 +(9 rows) + +-- Window Function with Complex ORDER BY +SELECT id, category, val, + COUNT(*) OVER w as cnt +FROM rpr_planner +WINDOW w AS ( + ORDER BY category DESC, val ASC + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY category DESC, val ASC; + id | category | val | cnt +----+----------+-----+----- + 7 | C | 70 | 9 + 8 | C | 80 | 0 + 9 | C | 90 | 0 + 4 | B | 40 | 0 + 5 | B | 50 | 0 + 6 | B | 60 | 0 + 1 | A | 10 | 0 + 2 | A | 20 | 0 + 3 | A | 30 | 0 +(9 rows) + +-- Named Window Reference +SELECT id, category, val, + COUNT(*) OVER w as cnt +FROM rpr_planner +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | category | val | cnt +----+----------+-----+----- + 1 | A | 10 | 9 + 2 | A | 20 | 0 + 3 | A | 30 | 0 + 4 | B | 40 | 0 + 5 | B | 50 | 0 + 6 | B | 60 | 0 + 7 | C | 70 | 0 + 8 | C | 80 | 0 + 9 | C | 90 | 0 +(9 rows) + +-- Inline Window Definition +SELECT id, category, val, + COUNT(*) OVER ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) as cnt +FROM rpr_planner +ORDER BY id; + id | category | val | cnt +----+----------+-----+----- + 1 | A | 10 | 9 + 2 | A | 20 | 0 + 3 | A | 30 | 0 + 4 | B | 40 | 0 + 5 | B | 50 | 0 + 6 | B | 60 | 0 + 7 | C | 70 | 0 + 8 | C | 80 | 0 + 9 | C | 90 | 0 +(9 rows) + +-- Window with Aggregate Functions +SELECT category, + COUNT(*) OVER w as window_cnt, + COUNT(*) as agg_cnt +FROM rpr_planner +WINDOW w AS ( + PARTITION BY category + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +GROUP BY category +ORDER BY category; +ERROR: syntax error at or near "GROUP" +LINE 12: GROUP BY category + ^ +-- Expected: ERROR: syntax error at or near "GROUP" +-- (GROUP BY after WINDOW clause is not valid SQL syntax) +-- ============================================================ +-- Subquery and CTE Tests +-- Files: planner.c, prepjointree.c +-- ============================================================ +-- Tests RPR with subqueries and CTEs +-- RPR in Subquery (FROM clause) +SELECT * FROM ( + SELECT id, category, val, + COUNT(*) OVER w as cnt + FROM rpr_planner + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) sub +WHERE cnt > 5 +ORDER BY id; + id | category | val | cnt +----+----------+-----+----- + 1 | A | 10 | 9 +(1 row) + +-- RPR with Subquery in WHERE +SELECT id, category, val, + COUNT(*) OVER w as cnt +FROM rpr_planner +WHERE val > (SELECT AVG(val) FROM rpr_planner) +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 50 +) +ORDER BY id; + id | category | val | cnt +----+----------+-----+----- + 6 | B | 60 | 4 + 7 | C | 70 | 0 + 8 | C | 80 | 0 + 9 | C | 90 | 0 +(4 rows) + +-- CTE with RPR +WITH rpr_cte AS ( + SELECT id, category, val, + COUNT(*) OVER w as cnt + FROM rpr_planner + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) +SELECT * FROM rpr_cte WHERE cnt > 5 ORDER BY id; + id | category | val | cnt +----+----------+-----+----- + 1 | A | 10 | 9 +(1 row) + +-- Multiple CTE References +WITH rpr_cte AS ( + SELECT id, category, val, + COUNT(*) OVER w as cnt + FROM rpr_planner + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) +SELECT c1.id, c1.cnt, c2.cnt as cnt2 +FROM rpr_cte c1 +JOIN rpr_cte c2 ON c1.id = c2.id +ORDER BY c1.id; + id | cnt | cnt2 +----+-----+------ + 1 | 9 | 9 + 2 | 0 | 0 + 3 | 0 | 0 + 4 | 0 | 0 + 5 | 0 | 0 + 6 | 0 | 0 + 7 | 0 | 0 + 8 | 0 | 0 + 9 | 0 | 0 +(9 rows) + +-- Nested CTEs +WITH cte1 AS ( + SELECT id, category, val FROM rpr_planner WHERE val > 30 +), +cte2 AS ( + SELECT id, category, val, + COUNT(*) OVER w as cnt + FROM cte1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) +) +SELECT * FROM cte2 ORDER BY id; + id | category | val | cnt +----+----------+-----+----- + 4 | B | 40 | 6 + 5 | B | 50 | 0 + 6 | B | 60 | 0 + 7 | C | 70 | 0 + 8 | C | 80 | 0 + 9 | C | 90 | 0 +(6 rows) + +-- ============================================================ +-- JOIN Tests +-- Files: prepjointree.c, setrefs.c +-- ============================================================ +-- Tests RPR with JOINs and multiple table references +CREATE TABLE rpr_join1 (id INT, val1 INT); +CREATE TABLE rpr_join2 (id INT, val2 INT); +INSERT INTO rpr_join1 VALUES (1, 10), (2, 20), (3, 30), (4, 40), (5, 50); +INSERT INTO rpr_join2 VALUES (1, 100), (2, 200), (3, 300), (4, 400), (5, 500); +-- RPR After INNER JOIN +SELECT t1.id, t1.val1, t2.val2, + COUNT(*) OVER w as cnt +FROM rpr_join1 t1 +INNER JOIN rpr_join2 t2 ON t1.id = t2.id +WINDOW w AS ( + ORDER BY t1.id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val1 + val2 > 100 +) +ORDER BY t1.id; + id | val1 | val2 | cnt +----+------+------+----- + 1 | 10 | 100 | 5 + 2 | 20 | 200 | 0 + 3 | 30 | 300 | 0 + 4 | 40 | 400 | 0 + 5 | 50 | 500 | 0 +(5 rows) + +-- RPR After LEFT JOIN +SELECT t1.id, t1.val1, t2.val2, + COUNT(*) OVER w as cnt +FROM rpr_join1 t1 +LEFT JOIN rpr_join2 t2 ON t1.id = t2.id +WINDOW w AS ( + ORDER BY t1.id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val1 > 0 +) +ORDER BY t1.id; + id | val1 | val2 | cnt +----+------+------+----- + 1 | 10 | 100 | 5 + 2 | 20 | 200 | 0 + 3 | 30 | 300 | 0 + 4 | 40 | 400 | 0 + 5 | 50 | 500 | 0 +(5 rows) + +-- RPR with Multiple Tables in DEFINE +SELECT t1.id, t1.val1, t2.val2, + COUNT(*) OVER w as cnt +FROM rpr_join1 t1 +INNER JOIN rpr_join2 t2 ON t1.id = t2.id +WINDOW w AS ( + ORDER BY t1.id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ B) + DEFINE A AS val1 > 20, + B AS val2 > 200 +) +ORDER BY t1.id; + id | val1 | val2 | cnt +----+------+------+----- + 1 | 10 | 100 | 0 + 2 | 20 | 200 | 0 + 3 | 30 | 300 | 3 + 4 | 40 | 400 | 0 + 5 | 50 | 500 | 0 +(5 rows) + +-- RPR After Cross Join +SELECT t1.id as id1, t2.id as id2, t1.val1, t2.val2, + COUNT(*) OVER w as cnt +FROM rpr_join1 t1 +CROSS JOIN rpr_join2 t2 +WHERE t1.id <= 2 AND t2.id <= 2 +WINDOW w AS ( + ORDER BY t1.id, t2.id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val1 + val2 > 0 +) +ORDER BY t1.id, t2.id; + id1 | id2 | val1 | val2 | cnt +-----+-----+------+------+----- + 1 | 1 | 10 | 100 | 4 + 1 | 2 | 10 | 200 | 0 + 2 | 1 | 20 | 100 | 0 + 2 | 2 | 20 | 200 | 0 +(4 rows) + +-- Self-Join with RPR +SELECT id, val1, val1_next, + COUNT(*) OVER w as cnt +FROM (SELECT a.id, a.val1, b.val1 as val1_next + FROM rpr_join1 a + INNER JOIN rpr_join1 b ON a.id + 1 = b.id) sub +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (X+) + DEFINE X AS val1 < val1_next +) +ORDER BY id; + id | val1 | val1_next | cnt +----+------+-----------+----- + 1 | 10 | 20 | 4 + 2 | 20 | 30 | 0 + 3 | 30 | 40 | 0 + 4 | 40 | 50 | 0 +(4 rows) + +DROP TABLE rpr_join1, rpr_join2; +-- ============================================================ +-- Complex Expression Tests +-- Files: createplan.c, setrefs.c +-- ============================================================ +-- Tests complex target list expressions +CREATE TABLE rpr_target (id INT, val INT); +INSERT INTO rpr_target VALUES (1, 10), (2, 20), (3, 30), (4, 40), (5, 50); +-- Expressions in Target List +SELECT id, + val * 2 as doubled, + val + 10 as added, + COUNT(*) OVER w as cnt +FROM rpr_target +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | doubled | added | cnt +----+---------+-------+----- + 1 | 20 | 20 | 5 + 2 | 40 | 30 | 0 + 3 | 60 | 40 | 0 + 4 | 80 | 50 | 0 + 5 | 100 | 60 | 0 +(5 rows) + +-- CASE Expression in Target List +SELECT id, val, + CASE + WHEN val < 30 THEN 'low' + WHEN val < 50 THEN 'medium' + ELSE 'high' + END as category, + COUNT(*) OVER w as cnt +FROM rpr_target +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | category | cnt +----+-----+----------+----- + 1 | 10 | low | 5 + 2 | 20 | low | 0 + 3 | 30 | medium | 0 + 4 | 40 | medium | 0 + 5 | 50 | high | 0 +(5 rows) + +-- Subquery in Target List +SELECT id, val, + (SELECT MAX(val) FROM rpr_target) as max_val, + COUNT(*) OVER w as cnt +FROM rpr_target +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | max_val | cnt +----+-----+---------+----- + 1 | 10 | 50 | 5 + 2 | 20 | 50 | 0 + 3 | 30 | 50 | 0 + 4 | 40 | 50 | 0 + 5 | 50 | 50 | 0 +(5 rows) + +-- Function Calls in Target List +SELECT id, val, + COALESCE(val, 0) as coalesced, + ABS(val - 30) as distance, + COUNT(*) OVER w as cnt +FROM rpr_target +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | coalesced | distance | cnt +----+-----+-----------+----------+----- + 1 | 10 | 10 | 20 | 5 + 2 | 20 | 20 | 10 | 0 + 3 | 30 | 30 | 0 | 0 + 4 | 40 | 40 | 10 | 0 + 5 | 50 | 50 | 20 | 0 +(5 rows) + +-- Column Aliases and References +SELECT id as row_id, + val as value, + COUNT(*) OVER w as cnt +FROM rpr_target +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY row_id; + row_id | value | cnt +--------+-------+----- + 1 | 10 | 5 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 +(5 rows) + +DROP TABLE rpr_target; +-- ============================================================ +-- Set Operations Tests +-- Files: planner.c +-- ============================================================ +-- Tests RPR with UNION, INTERSECT, EXCEPT +CREATE TABLE rpr_set1 (id INT, val INT); +CREATE TABLE rpr_set2 (id INT, val INT); +INSERT INTO rpr_set1 VALUES (1, 10), (2, 20), (3, 30); +INSERT INTO rpr_set2 VALUES (2, 20), (3, 30), (4, 40); +-- UNION with RPR +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +UNION +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set2 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 2 | 20 | 3 + 3 | 30 | 0 + 4 | 40 | 0 +(5 rows) + +-- UNION ALL with RPR +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +UNION ALL +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set2 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +ORDER BY id, val; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 + 2 | 20 | 3 + 3 | 30 | 0 + 3 | 30 | 0 + 4 | 40 | 0 +(6 rows) + +-- INTERSECT with RPR +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +INTERSECT +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set2 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +ORDER BY id; + id | val | cnt +----+-----+----- + 3 | 30 | 0 +(1 row) + +-- EXCEPT with RPR +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +EXCEPT +(SELECT id, val, COUNT(*) OVER w as cnt + FROM rpr_set2 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + )) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 3 + 2 | 20 | 0 +(2 rows) + +DROP TABLE rpr_set1, rpr_set2; +-- ============================================================ +-- Sorting and Grouping Tests +-- Files: planner.c, createplan.c +-- ============================================================ +-- Tests RPR interaction with sorting and grouping +CREATE TABLE rpr_sort (id INT, category VARCHAR(10), val INT); +INSERT INTO rpr_sort VALUES + (1, 'A', 30), (2, 'B', 20), (3, 'A', 10), + (4, 'B', 40), (5, 'A', 50), (6, 'B', 60); +-- RPR with GROUP BY (aggregate in DEFINE -> ERROR before GROUP BY interaction) +-- Expected: ERROR: aggregate functions are not allowed in DEFINE +SELECT category, + COUNT(*) as group_cnt, + MAX(val) as max_val, + COUNT(*) OVER w as window_cnt +FROM rpr_sort +GROUP BY category +WINDOW w AS ( + ORDER BY category + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS COUNT(*) > 0 +) +ORDER BY category; +ERROR: aggregate functions are not allowed in DEFINE +LINE 11: DEFINE A AS COUNT(*) > 0 + ^ +-- RPR with HAVING (same aggregate-in-DEFINE error) +-- Expected: ERROR: aggregate functions are not allowed in DEFINE +SELECT category, + COUNT(*) as group_cnt, + COUNT(*) OVER w as window_cnt +FROM rpr_sort +GROUP BY category +HAVING COUNT(*) > 2 +WINDOW w AS ( + ORDER BY category + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS COUNT(*) > 0 +) +ORDER BY category; +ERROR: aggregate functions are not allowed in DEFINE +LINE 11: DEFINE A AS COUNT(*) > 0 + ^ +-- RPR with DISTINCT +SELECT DISTINCT category, + COUNT(*) OVER w as cnt +FROM rpr_sort +WINDOW w AS ( + PARTITION BY category + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY category; + category | cnt +----------+----- + A | 3 + A | 0 + B | 0 + B | 3 +(4 rows) + +-- RPR with ORDER BY (different from window ORDER BY) +SELECT id, category, val, + COUNT(*) OVER w as cnt +FROM rpr_sort +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY val DESC; + id | category | val | cnt +----+----------+-----+----- + 6 | B | 60 | 0 + 5 | A | 50 | 0 + 4 | B | 40 | 0 + 1 | A | 30 | 6 + 2 | B | 20 | 0 + 3 | A | 10 | 0 +(6 rows) + +-- RPR with LIMIT and OFFSET +SELECT id, category, val, + COUNT(*) OVER w as cnt +FROM rpr_sort +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id +LIMIT 3 OFFSET 1; + id | category | val | cnt +----+----------+-----+----- + 2 | B | 20 | 0 + 3 | A | 10 | 0 + 4 | B | 40 | 0 +(3 rows) + +DROP TABLE rpr_sort; +-- SQL function inlining: $1 in DEFINE must be substituted by +-- substitute_actual_parameters_in_from via query_tree_mutator. +CREATE TABLE rpr_srf_t (v int); +INSERT INTO rpr_srf_t SELECT generate_series(1, 5); +CREATE FUNCTION rpr_srf_f(threshold int) +RETURNS TABLE (v int, cnt bigint) +LANGUAGE sql STABLE AS $$ + SELECT v::int, count(*) OVER w + FROM rpr_srf_t + WINDOW w AS ( + ORDER BY v + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS v > $1 + ) +$$; +SELECT v, cnt FROM rpr_srf_f(3) ORDER BY v; + v | cnt +---+----- + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 2 + 5 | 0 +(5 rows) + +DROP TABLE rpr_srf_t; +DROP FUNCTION rpr_srf_f(int); +DROP TABLE rpr_planner; +-- ============================================================ +-- Stress Tests +-- ============================================================ +-- Edge cases and stress scenarios +CREATE TABLE rpr_stress (id INT, val INT); +INSERT INTO rpr_stress SELECT i, i * 10 FROM generate_series(1, 20) i; +-- Very Long Query with Many Windows +SELECT id, val, + COUNT(*) OVER w1 as cnt1, + COUNT(*) OVER w2 as cnt2, + COUNT(*) OVER w3 as cnt3 +FROM rpr_stress +WINDOW w1 AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +), +w2 AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (B+) + DEFINE B AS val > 50 +), +w3 AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (C+) + DEFINE C AS val > 100 +) +ORDER BY id; + id | val | cnt1 | cnt2 | cnt3 +----+-----+------+------+------ + 1 | 10 | 20 | 0 | 0 + 2 | 20 | 0 | 0 | 0 + 3 | 30 | 0 | 0 | 0 + 4 | 40 | 0 | 0 | 0 + 5 | 50 | 0 | 0 | 0 + 6 | 60 | 0 | 15 | 0 + 7 | 70 | 0 | 0 | 0 + 8 | 80 | 0 | 0 | 0 + 9 | 90 | 0 | 0 | 0 + 10 | 100 | 0 | 0 | 0 + 11 | 110 | 0 | 0 | 10 + 12 | 120 | 0 | 0 | 0 + 13 | 130 | 0 | 0 | 0 + 14 | 140 | 0 | 0 | 0 + 15 | 150 | 0 | 0 | 0 + 16 | 160 | 0 | 0 | 0 + 17 | 170 | 0 | 0 | 0 + 18 | 180 | 0 | 0 | 0 + 19 | 190 | 0 | 0 | 0 + 20 | 200 | 0 | 0 | 0 +(20 rows) + +-- Deeply Nested Subqueries with RPR +SELECT * FROM ( + SELECT * FROM ( + SELECT * FROM ( + SELECT id, val, + COUNT(*) OVER w as cnt + FROM rpr_stress + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 + ) + ) sub1 + ) sub2 +) sub3 +WHERE cnt > 10 +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 20 +(1 row) + +-- Complex Expression in DEFINE Clause +SELECT id, val, + COUNT(*) OVER w as cnt +FROM rpr_stress +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+ B) + DEFINE A AS (val % 3 = 0 OR val % 5 = 0), + B AS (val * 2 > 100 AND val / 2 < 100) +) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 19 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 + 11 | 110 | 0 + 12 | 120 | 0 + 13 | 130 | 0 + 14 | 140 | 0 + 15 | 150 | 0 + 16 | 160 | 0 + 17 | 170 | 0 + 18 | 180 | 0 + 19 | 190 | 0 + 20 | 200 | 0 +(20 rows) + +-- Window with No Matching Rows +SELECT id, val, + COUNT(*) OVER w as cnt +FROM rpr_stress +WHERE val > 1000 -- No rows match +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- +(0 rows) + +-- Window on Single Row +SELECT id, val, + COUNT(*) OVER w as cnt +FROM rpr_stress +WHERE id = 10 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > 0 +) +ORDER BY id; + id | val | cnt +----+-----+----- + 10 | 100 | 1 +(1 row) + +DROP TABLE rpr_stress; +-- ============================================================ +-- Error Limit Tests +-- ============================================================ +-- Tests for error conditions in rpr.c +CREATE TABLE rpr_errors (id INT, val INT); +INSERT INTO rpr_errors VALUES (1, 10), (2, 20); +-- Test: DEFINE variable not in PATTERN (error) +SELECT id, val, COUNT(*) OVER w FROM rpr_errors +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A) + DEFINE + B AS TRUE +); +ERROR: DEFINE variable "b" is not used in PATTERN +LINE 7: B AS TRUE + ^ +-- Expected: Error - B is not used in PATTERN +-- Test: 251 variables in PATTERN and DEFINE (boundary - should succeed) +SELECT COUNT(*) OVER w FROM rpr_errors +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60 V61 V62 V63 V64 V65 V66 V67 V68 V69 V70 V71 V72 V73 V74 V75 V76 V77 V78 V79 V80 V81 V82 V83 V84 V85 V86 V87 V88 V89 V90 V91 V92 V93 V94 V95 V96 V97 V98 V99 V100 V101 V102 V103 V104 V105 V106 V107 V108 V109 V110 V111 V112 V113 V114 V115 V116 V117 V118 V119 V120 V121 V122 V123 V124 V125 V126 V127 V128 V129 V130 V131 V132 V133 V134 V135 V136 V137 V138 V139 V140 V141 V142 V143 V144 V145 V146 V147 V148 V149 V150 V151 V152 V153 V154 V155 V156 V157 V158 V159 V160 V161 V162 V163 V164 V165 V166 V167 V168 V169 V170 V171 V172 V173 V174 V175 V176 V177 V178 V179 V180 V181 V182 V183 V184 V185 V186 V187 V188 V189 V190 V191 V192 V193 V194 V195 V196 V197 V198 V199 V200 V201 V202 V203 V204 V205 V206 V207 V208 V209 V210 V211 V212 V213 V214 V215 V216 V217 V218 V219 V220 V221 V222 V223 V224 V225 V226 V227 V228 V229 V230 V231 V232 V233 V234 V235 V236 V237 V238 V239 V240 V241 V242 V243 V244 V245 V246 V247 V248 V249 V250 V251) + DEFINE + V1 AS val > 0, V2 AS val > 0, V3 AS val > 0, V4 AS val > 0, V5 AS val > 0, V6 AS val > 0, V7 AS val > 0, V8 AS val > 0, V9 AS val > 0, V10 AS val > 0, + V11 AS val > 0, V12 AS val > 0, V13 AS val > 0, V14 AS val > 0, V15 AS val > 0, V16 AS val > 0, V17 AS val > 0, V18 AS val > 0, V19 AS val > 0, V20 AS val > 0, + V21 AS val > 0, V22 AS val > 0, V23 AS val > 0, V24 AS val > 0, V25 AS val > 0, V26 AS val > 0, V27 AS val > 0, V28 AS val > 0, V29 AS val > 0, V30 AS val > 0, + V31 AS val > 0, V32 AS val > 0, V33 AS val > 0, V34 AS val > 0, V35 AS val > 0, V36 AS val > 0, V37 AS val > 0, V38 AS val > 0, V39 AS val > 0, V40 AS val > 0, + V41 AS val > 0, V42 AS val > 0, V43 AS val > 0, V44 AS val > 0, V45 AS val > 0, V46 AS val > 0, V47 AS val > 0, V48 AS val > 0, V49 AS val > 0, V50 AS val > 0, + V51 AS val > 0, V52 AS val > 0, V53 AS val > 0, V54 AS val > 0, V55 AS val > 0, V56 AS val > 0, V57 AS val > 0, V58 AS val > 0, V59 AS val > 0, V60 AS val > 0, + V61 AS val > 0, V62 AS val > 0, V63 AS val > 0, V64 AS val > 0, V65 AS val > 0, V66 AS val > 0, V67 AS val > 0, V68 AS val > 0, V69 AS val > 0, V70 AS val > 0, + V71 AS val > 0, V72 AS val > 0, V73 AS val > 0, V74 AS val > 0, V75 AS val > 0, V76 AS val > 0, V77 AS val > 0, V78 AS val > 0, V79 AS val > 0, V80 AS val > 0, + V81 AS val > 0, V82 AS val > 0, V83 AS val > 0, V84 AS val > 0, V85 AS val > 0, V86 AS val > 0, V87 AS val > 0, V88 AS val > 0, V89 AS val > 0, V90 AS val > 0, + V91 AS val > 0, V92 AS val > 0, V93 AS val > 0, V94 AS val > 0, V95 AS val > 0, V96 AS val > 0, V97 AS val > 0, V98 AS val > 0, V99 AS val > 0, V100 AS val > 0, + V101 AS val > 0, V102 AS val > 0, V103 AS val > 0, V104 AS val > 0, V105 AS val > 0, V106 AS val > 0, V107 AS val > 0, V108 AS val > 0, V109 AS val > 0, V110 AS val > 0, + V111 AS val > 0, V112 AS val > 0, V113 AS val > 0, V114 AS val > 0, V115 AS val > 0, V116 AS val > 0, V117 AS val > 0, V118 AS val > 0, V119 AS val > 0, V120 AS val > 0, + V121 AS val > 0, V122 AS val > 0, V123 AS val > 0, V124 AS val > 0, V125 AS val > 0, V126 AS val > 0, V127 AS val > 0, V128 AS val > 0, V129 AS val > 0, V130 AS val > 0, + V131 AS val > 0, V132 AS val > 0, V133 AS val > 0, V134 AS val > 0, V135 AS val > 0, V136 AS val > 0, V137 AS val > 0, V138 AS val > 0, V139 AS val > 0, V140 AS val > 0, + V141 AS val > 0, V142 AS val > 0, V143 AS val > 0, V144 AS val > 0, V145 AS val > 0, V146 AS val > 0, V147 AS val > 0, V148 AS val > 0, V149 AS val > 0, V150 AS val > 0, + V151 AS val > 0, V152 AS val > 0, V153 AS val > 0, V154 AS val > 0, V155 AS val > 0, V156 AS val > 0, V157 AS val > 0, V158 AS val > 0, V159 AS val > 0, V160 AS val > 0, + V161 AS val > 0, V162 AS val > 0, V163 AS val > 0, V164 AS val > 0, V165 AS val > 0, V166 AS val > 0, V167 AS val > 0, V168 AS val > 0, V169 AS val > 0, V170 AS val > 0, + V171 AS val > 0, V172 AS val > 0, V173 AS val > 0, V174 AS val > 0, V175 AS val > 0, V176 AS val > 0, V177 AS val > 0, V178 AS val > 0, V179 AS val > 0, V180 AS val > 0, + V181 AS val > 0, V182 AS val > 0, V183 AS val > 0, V184 AS val > 0, V185 AS val > 0, V186 AS val > 0, V187 AS val > 0, V188 AS val > 0, V189 AS val > 0, V190 AS val > 0, + V191 AS val > 0, V192 AS val > 0, V193 AS val > 0, V194 AS val > 0, V195 AS val > 0, V196 AS val > 0, V197 AS val > 0, V198 AS val > 0, V199 AS val > 0, V200 AS val > 0, + V201 AS val > 0, V202 AS val > 0, V203 AS val > 0, V204 AS val > 0, V205 AS val > 0, V206 AS val > 0, V207 AS val > 0, V208 AS val > 0, V209 AS val > 0, V210 AS val > 0, + V211 AS val > 0, V212 AS val > 0, V213 AS val > 0, V214 AS val > 0, V215 AS val > 0, V216 AS val > 0, V217 AS val > 0, V218 AS val > 0, V219 AS val > 0, V220 AS val > 0, + V221 AS val > 0, V222 AS val > 0, V223 AS val > 0, V224 AS val > 0, V225 AS val > 0, V226 AS val > 0, V227 AS val > 0, V228 AS val > 0, V229 AS val > 0, V230 AS val > 0, + V231 AS val > 0, V232 AS val > 0, V233 AS val > 0, V234 AS val > 0, V235 AS val > 0, V236 AS val > 0, V237 AS val > 0, V238 AS val > 0, V239 AS val > 0, V240 AS val > 0, + V241 AS val > 0, V242 AS val > 0, V243 AS val > 0, V244 AS val > 0, V245 AS val > 0, V246 AS val > 0, V247 AS val > 0, V248 AS val > 0, V249 AS val > 0, V250 AS val > 0, + V251 AS val > 0 +); + count +------- + 0 + 0 +(2 rows) + +-- Expected: Success - exactly at RPR_VARID_MAX boundary +-- Test: 252 variables in PATTERN, 251 in DEFINE (exceeds limit with implicit TRUE) +SELECT COUNT(*) OVER w FROM rpr_errors +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60 V61 V62 V63 V64 V65 V66 V67 V68 V69 V70 V71 V72 V73 V74 V75 V76 V77 V78 V79 V80 V81 V82 V83 V84 V85 V86 V87 V88 V89 V90 V91 V92 V93 V94 V95 V96 V97 V98 V99 V100 V101 V102 V103 V104 V105 V106 V107 V108 V109 V110 V111 V112 V113 V114 V115 V116 V117 V118 V119 V120 V121 V122 V123 V124 V125 V126 V127 V128 V129 V130 V131 V132 V133 V134 V135 V136 V137 V138 V139 V140 V141 V142 V143 V144 V145 V146 V147 V148 V149 V150 V151 V152 V153 V154 V155 V156 V157 V158 V159 V160 V161 V162 V163 V164 V165 V166 V167 V168 V169 V170 V171 V172 V173 V174 V175 V176 V177 V178 V179 V180 V181 V182 V183 V184 V185 V186 V187 V188 V189 V190 V191 V192 V193 V194 V195 V196 V197 V198 V199 V200 V201 V202 V203 V204 V205 V206 V207 V208 V209 V210 V211 V212 V213 V214 V215 V216 V217 V218 V219 V220 V221 V222 V223 V224 V225 V226 V227 V228 V229 V230 V231 V232 V233 V234 V235 V236 V237 V238 V239 V240 V241 V242 V243 V244 V245 V246 V247 V248 V249 V250 V251 V252) + DEFINE + V1 AS val > 0, V2 AS val > 0, V3 AS val > 0, V4 AS val > 0, V5 AS val > 0, V6 AS val > 0, V7 AS val > 0, V8 AS val > 0, V9 AS val > 0, V10 AS val > 0, + V11 AS val > 0, V12 AS val > 0, V13 AS val > 0, V14 AS val > 0, V15 AS val > 0, V16 AS val > 0, V17 AS val > 0, V18 AS val > 0, V19 AS val > 0, V20 AS val > 0, + V21 AS val > 0, V22 AS val > 0, V23 AS val > 0, V24 AS val > 0, V25 AS val > 0, V26 AS val > 0, V27 AS val > 0, V28 AS val > 0, V29 AS val > 0, V30 AS val > 0, + V31 AS val > 0, V32 AS val > 0, V33 AS val > 0, V34 AS val > 0, V35 AS val > 0, V36 AS val > 0, V37 AS val > 0, V38 AS val > 0, V39 AS val > 0, V40 AS val > 0, + V41 AS val > 0, V42 AS val > 0, V43 AS val > 0, V44 AS val > 0, V45 AS val > 0, V46 AS val > 0, V47 AS val > 0, V48 AS val > 0, V49 AS val > 0, V50 AS val > 0, + V51 AS val > 0, V52 AS val > 0, V53 AS val > 0, V54 AS val > 0, V55 AS val > 0, V56 AS val > 0, V57 AS val > 0, V58 AS val > 0, V59 AS val > 0, V60 AS val > 0, + V61 AS val > 0, V62 AS val > 0, V63 AS val > 0, V64 AS val > 0, V65 AS val > 0, V66 AS val > 0, V67 AS val > 0, V68 AS val > 0, V69 AS val > 0, V70 AS val > 0, + V71 AS val > 0, V72 AS val > 0, V73 AS val > 0, V74 AS val > 0, V75 AS val > 0, V76 AS val > 0, V77 AS val > 0, V78 AS val > 0, V79 AS val > 0, V80 AS val > 0, + V81 AS val > 0, V82 AS val > 0, V83 AS val > 0, V84 AS val > 0, V85 AS val > 0, V86 AS val > 0, V87 AS val > 0, V88 AS val > 0, V89 AS val > 0, V90 AS val > 0, + V91 AS val > 0, V92 AS val > 0, V93 AS val > 0, V94 AS val > 0, V95 AS val > 0, V96 AS val > 0, V97 AS val > 0, V98 AS val > 0, V99 AS val > 0, V100 AS val > 0, + V101 AS val > 0, V102 AS val > 0, V103 AS val > 0, V104 AS val > 0, V105 AS val > 0, V106 AS val > 0, V107 AS val > 0, V108 AS val > 0, V109 AS val > 0, V110 AS val > 0, + V111 AS val > 0, V112 AS val > 0, V113 AS val > 0, V114 AS val > 0, V115 AS val > 0, V116 AS val > 0, V117 AS val > 0, V118 AS val > 0, V119 AS val > 0, V120 AS val > 0, + V121 AS val > 0, V122 AS val > 0, V123 AS val > 0, V124 AS val > 0, V125 AS val > 0, V126 AS val > 0, V127 AS val > 0, V128 AS val > 0, V129 AS val > 0, V130 AS val > 0, + V131 AS val > 0, V132 AS val > 0, V133 AS val > 0, V134 AS val > 0, V135 AS val > 0, V136 AS val > 0, V137 AS val > 0, V138 AS val > 0, V139 AS val > 0, V140 AS val > 0, + V141 AS val > 0, V142 AS val > 0, V143 AS val > 0, V144 AS val > 0, V145 AS val > 0, V146 AS val > 0, V147 AS val > 0, V148 AS val > 0, V149 AS val > 0, V150 AS val > 0, + V151 AS val > 0, V152 AS val > 0, V153 AS val > 0, V154 AS val > 0, V155 AS val > 0, V156 AS val > 0, V157 AS val > 0, V158 AS val > 0, V159 AS val > 0, V160 AS val > 0, + V161 AS val > 0, V162 AS val > 0, V163 AS val > 0, V164 AS val > 0, V165 AS val > 0, V166 AS val > 0, V167 AS val > 0, V168 AS val > 0, V169 AS val > 0, V170 AS val > 0, + V171 AS val > 0, V172 AS val > 0, V173 AS val > 0, V174 AS val > 0, V175 AS val > 0, V176 AS val > 0, V177 AS val > 0, V178 AS val > 0, V179 AS val > 0, V180 AS val > 0, + V181 AS val > 0, V182 AS val > 0, V183 AS val > 0, V184 AS val > 0, V185 AS val > 0, V186 AS val > 0, V187 AS val > 0, V188 AS val > 0, V189 AS val > 0, V190 AS val > 0, + V191 AS val > 0, V192 AS val > 0, V193 AS val > 0, V194 AS val > 0, V195 AS val > 0, V196 AS val > 0, V197 AS val > 0, V198 AS val > 0, V199 AS val > 0, V200 AS val > 0, + V201 AS val > 0, V202 AS val > 0, V203 AS val > 0, V204 AS val > 0, V205 AS val > 0, V206 AS val > 0, V207 AS val > 0, V208 AS val > 0, V209 AS val > 0, V210 AS val > 0, + V211 AS val > 0, V212 AS val > 0, V213 AS val > 0, V214 AS val > 0, V215 AS val > 0, V216 AS val > 0, V217 AS val > 0, V218 AS val > 0, V219 AS val > 0, V220 AS val > 0, + V221 AS val > 0, V222 AS val > 0, V223 AS val > 0, V224 AS val > 0, V225 AS val > 0, V226 AS val > 0, V227 AS val > 0, V228 AS val > 0, V229 AS val > 0, V230 AS val > 0, + V231 AS val > 0, V232 AS val > 0, V233 AS val > 0, V234 AS val > 0, V235 AS val > 0, V236 AS val > 0, V237 AS val > 0, V238 AS val > 0, V239 AS val > 0, V240 AS val > 0, + V241 AS val > 0, V242 AS val > 0, V243 AS val > 0, V244 AS val > 0, V245 AS val > 0, V246 AS val > 0, V247 AS val > 0, V248 AS val > 0, V249 AS val > 0, V250 AS val > 0, + V251 AS val > 0 +); +ERROR: too many pattern variables +DETAIL: Maximum is 251. +-- Expected: ERROR - too many pattern variables (Maximum is 251) +-- Test: Pattern nesting at maximum depth (depth 253) +-- Note: 253 nested GROUP{3,7} quantifiers produce depth 253 after optimization +SELECT id, val, COUNT(*) OVER w FROM rpr_errors +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A{3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}) + DEFINE A AS val > 0 +); + id | val | count +----+-----+------- + 1 | 10 | 0 + 2 | 20 | 0 +(2 rows) + +-- Expected: Should succeed +-- Test: Pattern nesting depth exceeds maximum (depth 254) +-- Note: 254 nested GROUP{3,7} quantifiers produce depth 254 after optimization +SELECT id, val, COUNT(*) OVER w FROM rpr_errors +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A{3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}) + DEFINE A AS val > 0 +); +ERROR: pattern nesting too deep +DETAIL: Pattern nesting depth 254 exceeds maximum 253. +-- Expected: ERROR - pattern nesting too deep +DROP TABLE rpr_errors; +-- ============================================================ +-- Jacob's Patterns +-- ============================================================ +-- Basic pattern matching tests from jacob branch +-- Test: A? (optional, greedy) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A?) + DEFINE A AS val > 50 +); + id | val | c +----+-----+--- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 1 + 7 | 70 | 1 + 8 | 80 | 1 + 9 | 90 | 1 + 10 | 100 | 1 +(10 rows) + +-- Test: A{2} (exact count) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2}) + DEFINE A AS val <= 50 +); + id | val | c +----+-----+--- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 30 | 2 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Test: A{1,3} (bounded range, greedy) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{1,3}) + DEFINE A AS val <= 50 +); + id | val | c +----+-----+--- + 1 | 10 | 3 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 2 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Test: A | B (simple alternation) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A | B) + DEFINE A AS val <= 30, B AS val > 70 +); + id | val | c +----+-----+--- + 1 | 10 | 1 + 2 | 20 | 1 + 3 | 30 | 1 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 1 + 9 | 90 | 1 + 10 | 100 | 1 +(10 rows) + +-- Test: A | B | C (three-way alternation) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A | B | C) + DEFINE A AS val <= 20, B AS val BETWEEN 40 AND 60, C AS val > 80 +); + id | val | c +----+-----+--- + 1 | 10 | 1 + 2 | 20 | 1 + 3 | 30 | 0 + 4 | 40 | 1 + 5 | 50 | 1 + 6 | 60 | 1 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 1 + 10 | 100 | 1 +(10 rows) + +-- Test: A B C (concatenation) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS val <= 30, B AS val BETWEEN 31 AND 60, C AS val > 60 +); + id | val | c +----+-----+--- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Test: A B? C (optional middle) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B? C) + DEFINE A AS val <= 30, B AS val BETWEEN 31 AND 60, C AS val > 60 +); + id | val | c +----+-----+--- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Test: (A B)+ (grouped quantifier) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+) + DEFINE A AS val <= 50, B AS val > 50 +); + id | val | c +----+-----+--- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 2 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Test: (A | B)+ C (alternation with quantifier) +SELECT id, val, count(*) OVER w AS c +FROM rpr_plan +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE A AS val <= 30, B AS val BETWEEN 31 AND 60, C AS val > 80 +); + id | val | c +----+-----+--- + 1 | 10 | 0 + 2 | 20 | 0 + 3 | 30 | 0 + 4 | 40 | 0 + 5 | 50 | 0 + 6 | 60 | 0 + 7 | 70 | 0 + 8 | 80 | 0 + 9 | 90 | 0 + 10 | 100 | 0 +(10 rows) + +-- Test: (A+ | (A | B)+)* - nested alternation inside quantified group +-- Previously caused infinite recursion in nfa_advance_alt when the inner +-- BEGIN(+)'s skip jump was followed as an ALT branch pointer. +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM (VALUES + (1, ARRAY['A', 'B']), + (2, ARRAY['B']), + (3, ARRAY['C']) +) AS t(id, flags) +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A+ | (A | B)+)*) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 2 + 2 | {B} | | + 3 | {C} | | +(3 rows) + +-- ============================================================ +-- Pathological Patterns +-- ============================================================ +-- These patterns previously caused issues. Now optimized or handled safely. +-- Test: (A*)* - nested unbounded (optimized to A*) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A*)*) + DEFINE A AS TRUE +); + v | c +---+--- + 1 | 5 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 +(5 rows) + +-- Test: (A*)+ - inner nullable (optimized to A*) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A*)+) + DEFINE A AS TRUE +); + v | c +---+--- + 1 | 5 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 +(5 rows) + +-- Test: (A+)* - outer nullable (optimized to A*) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A+)*) + DEFINE A AS TRUE +); + v | c +---+--- + 1 | 5 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 +(5 rows) + +-- Test: (A+)+ - both require match (optimized to A+) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 5) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((A+)+) + DEFINE A AS TRUE +); + v | c +---+--- + 1 | 5 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 +(5 rows) + +-- Test: (((A)*)*)* - triple nested (optimized to A*) +SELECT v, count(*) OVER w AS c +FROM (SELECT generate_series(1, 3) v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ((((A)*)*)*) + DEFINE A AS TRUE +); + v | c +---+--- + 1 | 3 + 2 | 0 + 3 | 0 +(3 rows) + +-- Optional group with alternation: A ((B | C) (D | E))* F? +-- When only A matches, the * group matches 0 times and F? matches 0 times +SELECT id, val, match_len +FROM (SELECT id, val, + COUNT(*) OVER w AS match_len + FROM (VALUES (1, 1), (2, 99)) AS t(id, val) + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A ((B | C) (D | E))* F?) + DEFINE A AS val = 1, + B AS val = 2, C AS val = 3, + D AS val = 4, E AS val = 5, + F AS val = 6 + ) +) s; + id | val | match_len +----+-----+----------- + 1 | 1 | 1 + 2 | 99 | 0 +(2 rows) + +DROP TABLE rpr_plan; diff --git a/src/test/regress/expected/rpr_explain.out b/src/test/regress/expected/rpr_explain.out new file mode 100644 index 00000000000..dc3522f930f --- /dev/null +++ b/src/test/regress/expected/rpr_explain.out @@ -0,0 +1,4885 @@ +-- ============================================================ +-- RPR EXPLAIN Tests +-- Tests for Row Pattern Recognition EXPLAIN output +-- ============================================================ +-- +-- Views and tables in this file are intentionally not dropped, +-- so that pg_upgrade/pg_dump can test RPR syntax serialization. +-- +-- This test suite validates EXPLAIN output for RPR queries, +-- including NFA statistics shown in EXPLAIN ANALYZE: +-- - NFA States: peak, total, merged +-- - NFA Contexts: peak, total, pruned +-- - NFA: matched (len min/max/avg), mismatched (len min/max/avg) +-- - NFA: absorbed (len min/max/avg), skipped (len min/max/avg) +-- - Pattern deparse formatting +-- - Multiple output formats (text, JSON, XML) +-- +-- Test Coverage: +-- Basic NFA Statistics Tests +-- State Statistics Tests +-- Context Statistics Tests +-- Match Length Statistics Tests +-- Mismatch Length Statistics Tests +-- JSON Format Tests +-- XML Format Tests +-- Multiple Partitions Tests +-- Edge Cases +-- Complex Pattern Tests +-- Real-world Pattern Examples +-- Performance-oriented Tests +-- INITIAL vs no INITIAL comparison +-- Quantifier Variations +-- Regression Tests for Statistics Accuracy +-- Alternation Pattern Tests +-- Group Pattern Tests +-- Window Function Combinations +-- DEFINE Expression Variations +-- Large Scale Statistics Verification +-- Nav Mark Lookback/Lookahead (tuplestore trim) +-- ============================================================ +-- Filter function to normalize platform-dependent memory values (not NFA statistics). +-- NFA statistics should not change between platforms; if they do, it could +-- indicate issues such as uninitialized memory access. +-- Works for text, JSON, and XML formats. +create function rpr_explain_filter(text) returns setof text +language plpgsql as +$$ +declare + ln text; +begin + for ln in execute $1 + loop + -- Normalize platform-dependent memory values + -- Keep NFA statistics numbers unchanged (they are test assertions) + + -- Text format: "Storage: Memory Maximum Storage: 18kB" + if ln ~ 'Storage:.*Maximum Storage:' then + ln := regexp_replace(ln, '\m\d+kB', 'NkB', 'g'); + end if; + + -- JSON format: "Maximum Storage": 17 (number in kB units) + if ln ~ '"Maximum Storage":' then + ln := regexp_replace(ln, '"Maximum Storage": \d+', '"Maximum Storage": 0', 'g'); + end if; + + -- XML format: 17 (number in kB units) + if ln ~ '' then + ln := regexp_replace(ln, '\d+', '0', 'g'); + end if; + + -- Sort Method memory is platform-dependent (32-bit vs 64-bit) + if ln ~ 'Sort Method:.*Memory:' then + ln := regexp_replace(ln, 'Memory: \d+kB', 'Memory: NkB'); + end if; + + return next ln; + end loop; +end; +$$; +-- Setup: Create test tables +CREATE TABLE rpr_nfa_test ( + id serial, + v int, + cat char(1) +); +-- Insert test data: 100 rows with predictable pattern +INSERT INTO rpr_nfa_test (v, cat) +SELECT i, + CASE + WHEN i % 5 = 1 THEN 'A' + WHEN i % 5 = 2 THEN 'B' + WHEN i % 5 = 3 THEN 'C' + WHEN i % 5 = 4 THEN 'D' + ELSE 'E' + END +FROM generate_series(1, 100) i; +-- Additional test table with more complex patterns +CREATE TABLE rpr_nfa_complex ( + id serial, + price int, + trend char(1) -- U=up, D=down, S=stable +); +INSERT INTO rpr_nfa_complex (price, trend) +VALUES + (100, 'S'), (105, 'U'), (110, 'U'), (108, 'D'), (112, 'U'), + (115, 'U'), (113, 'D'), (111, 'D'), (109, 'D'), (110, 'U'), + (120, 'U'), (125, 'U'), (130, 'U'), (128, 'D'), (126, 'D'), + (124, 'D'), (122, 'D'), (120, 'D'), (118, 'D'), (119, 'U'), + (121, 'U'), (123, 'U'), (125, 'U'), (127, 'U'), (129, 'U'), + (131, 'U'), (133, 'U'), (130, 'D'), (127, 'D'), (124, 'D'); +-- ============================================================ +-- Basic NFA Statistics Tests +-- ============================================================ +-- Simple pattern - should show basic statistics +CREATE VIEW rpr_ev_basic_simple AS +SELECT count(*) OVER w +FROM rpr_nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS cat = 'A', B AS cat = 'B' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_basic_simple'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------ + PATTERN (a b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM rpr_nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS cat = ''A'', B AS cat = ''B'' +)'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 101 total, 0 merged + NFA Contexts: 2 peak, 101 total, 60 pruned + NFA: 20 matched (len 2/2/2.0), 0 mismatched + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) + -> Seq Scan on rpr_nfa_test (actual rows=100.00 loops=1) +(10 rows) + +-- Pattern with no matches - 0 matched +CREATE VIEW rpr_ev_basic_nomatch AS +SELECT count(*) OVER w +FROM rpr_nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (X Y Z) + DEFINE X AS cat = 'X', Y AS cat = 'Y', Z AS cat = 'Z' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_basic_nomatch'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (x y z) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM rpr_nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (X Y Z) + DEFINE X AS cat = ''X'', Y AS cat = ''Y'', Z AS cat = ''Z'' +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: x y z + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 1 peak, 101 total, 0 merged + NFA Contexts: 2 peak, 101 total, 100 pruned + NFA: 0 matched, 0 mismatched + -> Seq Scan on rpr_nfa_test (actual rows=100.00 loops=1) +(9 rows) + +-- Pattern matching every row - high match count +CREATE VIEW rpr_ev_basic_allrows AS +SELECT count(*) OVER w +FROM rpr_nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (R) + DEFINE R AS TRUE +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_basic_allrows'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------- + PATTERN (r) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM rpr_nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (R) + DEFINE R AS TRUE +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: r + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 101 total, 0 merged + NFA Contexts: 2 peak, 101 total, 0 pruned + NFA: 100 matched (len 1/1/1.0), 0 mismatched + -> Seq Scan on rpr_nfa_test (actual rows=100.00 loops=1) +(9 rows) + +-- Regression test: Space before parenthesis in pattern deparse +-- Verifies that "A (B | C)" correctly outputs as "a (b | c)" with space +CREATE VIEW rpr_ev_basic_deparse_space AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A (B | C)) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_basic_deparse_space'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------ + PATTERN (a (b | c)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A (B | C)) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=20.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a (b | c) + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 35 total, 0 merged + NFA Contexts: 2 peak, 21 total, 6 pruned + NFA: 7 matched (len 2/2/2.0), 0 mismatched + NFA: 0 absorbed, 7 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=20.00 loops=1) +(10 rows) + +-- Regression test: Sequential alternations at same depth +-- Verifies that "((B | C) (D | E))" correctly outputs as "(b | c) (d | e)" +-- Previously failed due to missing parentheses on ALT depth decrease +CREATE VIEW rpr_ev_basic_deparse_seqalt AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) (D | E))*) + DEFINE A AS v % 5 = 1, B AS v % 5 = 2, C AS v % 5 = 3, D AS v % 5 = 4, E AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_basic_deparse_seqalt'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------------------- + PATTERN (a ((b | c) (d | e))*) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) (D | E))*) + DEFINE A AS v % 5 = 1, B AS v % 5 = 2, C AS v % 5 = 3, D AS v % 5 = 4, E AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a ((b | c) (d | e))* + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 61 total, 0 merged + NFA Contexts: 3 peak, 31 total, 24 pruned + NFA: 6 matched (len 1/1/1.0), 0 mismatched + -> Function Scan on generate_series s (actual rows=30.00 loops=1) +(9 rows) + +-- Regression test: Quoted identifiers in EXPLAIN pattern deparse +-- Mixed case names must be quoted to preserve round-trip safety +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 10) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ("Start" "Up"+) + DEFINE "Start" AS TRUE, "Up" AS v > PREV(v) +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: "Start" "Up"+ + Nav Mark Lookback: 1 + -> Function Scan on generate_series s +(5 rows) + +-- ============================================================ +-- State Statistics Tests (peak, total, merged) +-- ============================================================ +-- Simple quantifier pattern - A+ with short matches (no merging) +CREATE VIEW rpr_ev_state_simple_quant AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS v % 2 = 1 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_state_simple_quant'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------- + PATTERN (a+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS v % 2 = 1 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 76 total, 0 merged + NFA Contexts: 3 peak, 51 total, 25 pruned + NFA: 25 matched (len 1/1/1.0), 0 mismatched + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(9 rows) + +-- Alternation pattern - multiple state branches +CREATE VIEW rpr_ev_state_alt AS +SELECT count(*) OVER w +FROM rpr_nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C) (D | E)) + DEFINE + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_state_alt'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------------------- + PATTERN ((a | b | c) (d | e)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM rpr_nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C) (D | E)) + DEFINE + A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'', + D AS cat = ''D'', E AS cat = ''E'' +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b | c) (d | e) + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 6 peak, 524 total, 0 merged + NFA Contexts: 3 peak, 101 total, 20 pruned + NFA: 20 matched (len 2/2/2.0), 40 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) + -> Seq Scan on rpr_nfa_test (actual rows=100.00 loops=1) +(10 rows) + +-- Complex pattern with high state count +CREATE VIEW rpr_ev_state_complex AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B* C+) + DEFINE + A AS v % 3 = 1, + B AS v % 3 = 2, + C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_state_complex'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------- + PATTERN (a+ b* c+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B* C+) + DEFINE + A AS v % 3 = 1, + B AS v % 3 = 2, + C AS v % 3 = 0 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b* c+ + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 235 total, 0 merged + NFA Contexts: 3 peak, 101 total, 34 pruned + NFA: 33 matched (len 3/3/3.0), 0 mismatched + NFA: 0 absorbed, 33 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(10 rows) + +-- Grouped pattern with quantifier - state count with grouping +CREATE VIEW rpr_ev_state_group_quant AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_state_group_quant'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN ((a b)+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b')+" + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 91 total, 0 merged + NFA Contexts: 3 peak, 61 total, 0 pruned + NFA: 1 matched (len 60/60/60.0), 0 mismatched + NFA: 29 absorbed (len 2/2/2.0), 30 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(10 rows) + +-- State explosion pattern - many alternations +-- Pattern (A|B)(A|B)(A|B)(A|B) can create many parallel states +CREATE VIEW rpr_ev_state_explosion AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B)) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_state_explosion'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------------------------------------------------------------ + PATTERN ((a | b) (a | b) (a | b) (a | b) (a | b) (a | b) (a | b) (a | b)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B)) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b){8} + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 17 peak, 995 total, 0 merged + NFA Contexts: 8 peak, 101 total, 1 pruned + NFA: 12 matched (len 8/8/8.0), 3 mismatched (len 2/4/3.0) + NFA: 0 absorbed, 84 skipped (len 1/7/4.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(10 rows) + +-- Consecutive ALT merge followed by different ALT +-- Tests mergeConsecutiveAlts flush on ALT change: (A|B){2} (C|D) +CREATE VIEW rpr_ev_state_alt_merge_alt AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_state_alt_merge_alt'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------------------- + PATTERN ((a | b) (a | b) (c | d)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b){2} (c | d) + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 7 peak, 181 total, 0 merged + NFA Contexts: 3 peak, 41 total, 12 pruned + NFA: 9 matched (len 3/3/3.0), 1 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 18 skipped (len 1/2/1.5) + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(10 rows) + +-- Consecutive ALT merge followed by non-ALT element +-- Tests mergeConsecutiveAlts flush on non-ALT: (A|B){2} c +CREATE VIEW rpr_ev_state_alt_merge_nonalt AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_state_alt_merge_nonalt'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------------- + PATTERN ((a | b) (a | b) c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b){2} c + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 6 peak, 177 total, 0 merged + NFA Contexts: 3 peak, 41 total, 2 pruned + NFA: 12 matched (len 3/3/3.0), 2 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 24 skipped (len 1/2/1.5) + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(10 rows) + +-- ALT prefix/suffix absorbed into GROUP: (A|B) (A|B)+ (A|B) -> (A|B){3,} +CREATE VIEW rpr_ev_state_alt_absorb_group AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B)+ (A | B)) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_state_alt_absorb_group'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------------------------- + PATTERN ((a | b) (a | b)+ (a | b)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B)+ (A | B)) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b){3,} + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 7 peak, 243 total, 0 merged + NFA Contexts: 3 peak, 41 total, 0 pruned + NFA: 1 matched (len 40/40/40.0), 0 mismatched + NFA: 0 absorbed, 39 skipped (len 1/2/1.0) + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(10 rows) + +-- High state count - alternation with plus quantifier +CREATE VIEW rpr_ev_state_alt_plus AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C)+ D) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3, D AS v % 4 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_state_alt_plus'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------------- + PATTERN ((a | b | c)+ d) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C)+ D) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3, D AS v % 4 = 0 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b | c)+ d + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 16 peak, 1004 total, 0 merged + NFA Contexts: 4 peak, 101 total, 0 pruned + NFA: 25 matched (len 4/4/4.0), 0 mismatched + NFA: 0 absorbed, 75 skipped (len 1/3/2.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(10 rows) + +-- Early termination: first ALT branch (A) reaches FIN immediately, +-- pruning second branch (A B+) before it can accumulate B repetitions. +CREATE VIEW rpr_ev_state_alt_prune AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | A B)+) + DEFINE A AS v = 1, B AS v > 1 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_state_alt_prune'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------- + PATTERN ((a | a b)+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | A B)+) + DEFINE A AS v = 1, B AS v > 1 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | a b)+ + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 6 peak, 306 total, 0 merged + NFA Contexts: 3 peak, 101 total, 99 pruned + NFA: 1 matched (len 1/1/1.0), 0 mismatched + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(9 rows) + +-- Nested quantifiers causing state growth +CREATE VIEW rpr_ev_state_nested_quant AS +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A | B)+)+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_state_nested_quant'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN (((a | b)+)+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A | B)+)+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2 +);'); + rpr_explain_filter +------------------------------------------------------------------------ + WindowAgg (actual rows=1000.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b)+ + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 6 peak, 5004 total, 0 merged + NFA Contexts: 3 peak, 1001 total, 333 pruned + NFA: 334 matched (len 1/2/2.0), 0 mismatched + NFA: 0 absorbed, 333 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=1000.00 loops=1) +(10 rows) + +-- ============================================================ +-- Context Statistics Tests (peak, total, pruned + absorbed/skipped) +-- ============================================================ +-- Context absorption with unbounded quantifier at start +CREATE VIEW rpr_ev_ctx_absorb_unbounded AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_ctx_absorb_unbounded'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 91 total, 0 merged + NFA Contexts: 2 peak, 51 total, 0 pruned + NFA: 10 matched (len 5/5/5.0), 0 mismatched + NFA: 30 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(10 rows) + +-- No absorption - bounded quantifier +CREATE VIEW rpr_ev_ctx_no_absorb AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,4} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_ctx_no_absorb'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------- + PATTERN (a{2,4} b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,4} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{2,4} b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 7 peak, 101 total, 0 merged + NFA Contexts: 5 peak, 51 total, 0 pruned + NFA: 10 matched (len 5/5/5.0), 0 mismatched + NFA: 0 absorbed, 40 skipped (len 1/4/2.5) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(10 rows) + +-- Contexts skipped by SKIP PAST LAST ROW +CREATE VIEW rpr_ev_ctx_skip AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v % 10 = 1, B AS v % 10 = 2, C AS v % 10 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_ctx_skip'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (a b c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v % 10 = 1, B AS v % 10 = 2, C AS v % 10 = 3 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b c + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 101 total, 0 merged + NFA Contexts: 3 peak, 101 total, 80 pruned + NFA: 10 matched (len 3/3/3.0), 0 mismatched + NFA: 0 absorbed, 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(10 rows) + +-- High context absorption - unbounded group +CREATE VIEW rpr_ev_ctx_absorb_group AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_ctx_absorb_group'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------- + PATTERN ((a b)+ c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b')+" c + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 134 total, 0 merged + NFA Contexts: 3 peak, 101 total, 34 pruned + NFA: 33 matched (len 3/3/3.0), 0 mismatched + NFA: 0 absorbed, 33 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(10 rows) + +-- Fixed-length group absorption: (A B B)+ C +-- B B merged to B{2}; absorbable with fixed-length check +-- step_size=3 (A + B + B); v % 7 cycle gives 2 iterations per match +CREATE VIEW rpr_ev_ctx_absorb_fixedvar AS +SELECT count(*) OVER w +FROM generate_series(1, 70) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B B)+ C) + DEFINE A AS v % 7 IN (1, 4), B AS v % 7 IN (2, 3, 5, 6), C AS v % 7 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_ctx_absorb_fixedvar'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------- + PATTERN ((a b b)+ c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 70) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B B)+ C) + DEFINE A AS v % 7 IN (1, 4), B AS v % 7 IN (2, 3, 5, 6), C AS v % 7 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=70.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b{2}')+" c + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 91 total, 0 merged + NFA Contexts: 4 peak, 71 total, 40 pruned + NFA: 10 matched (len 7/7/7.0), 0 mismatched + NFA: 10 absorbed (len 3/3/3.0), 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=70.00 loops=1) +(10 rows) + +-- Nested fixed-length group absorption: (A (B C){2} D)+ E +-- step_size = 1 + (1+1)*2 + 1 = 6; v % 13 cycle gives 2 iterations + E +CREATE VIEW rpr_ev_ctx_absorb_nested AS +SELECT count(*) OVER w +FROM generate_series(1, 65) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A (B C){2} D)+ E) + DEFINE A AS v % 13 IN (1, 7), B AS v % 13 IN (2, 4, 8, 10), + C AS v % 13 IN (3, 5, 9, 11), D AS v % 13 IN (6, 12), + E AS v % 13 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_ctx_absorb_nested'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------------- + PATTERN ((a (b c){2} d)+ e) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 65) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A (B C){2} D)+ E) + DEFINE A AS v % 13 IN (1, 7), B AS v % 13 IN (2, 4, 8, 10), + C AS v % 13 IN (3, 5, 9, 11), D AS v % 13 IN (6, 12), + E AS v % 13 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=65.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' (b' c'){2}' d')+" e + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 76 total, 0 merged + NFA Contexts: 4 peak, 66 total, 50 pruned + NFA: 5 matched (len 13/13/13.0), 0 mismatched + NFA: 5 absorbed (len 6/6/6.0), 5 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=65.00 loops=1) +(10 rows) + +-- Doubly nested fixed-length group absorption: (A ((B C{3}){2} D){2} E)+ F +-- step_size = 1 + ((1+3)*2+1)*2 + 1 = 20; v % 41 cycle gives 2 iterations + F +CREATE VIEW rpr_ev_ctx_absorb_deep AS +SELECT count(*) OVER w +FROM generate_series(1, 82) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A ((B C C C){2} D){2} E)+ F) + DEFINE A AS v % 41 IN (1, 21), + B AS v % 41 IN (2, 6, 11, 15, 22, 26, 31, 35), + C AS v % 41 IN (3,4,5, 7,8,9, 12,13,14, 16,17,18, + 23,24,25, 27,28,29, 32,33,34, 36,37,38), + D AS v % 41 IN (10, 19, 30, 39), + E AS v % 41 IN (20, 40), + F AS v % 41 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_ctx_absorb_deep'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------------------------- + PATTERN ((a ((b c c c){2} d){2} e)+ f) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 82) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A ((B C C C){2} D){2} E)+ F) + DEFINE A AS v % 41 IN (1, 21), + B AS v % 41 IN (2, 6, 11, 15, 22, 26, 31, 35), + C AS v % 41 IN (3,4,5, 7,8,9, 12,13,14, 16,17,18, + 23,24,25, 27,28,29, 32,33,34, 36,37,38), + D AS v % 41 IN (10, 19, 30, 39), + E AS v % 41 IN (20, 40), + F AS v % 41 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=82.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' ((b' c{3}'){2}' d'){2}' e')+" f + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 87 total, 0 merged + NFA Contexts: 4 peak, 83 total, 76 pruned + NFA: 2 matched (len 41/41/41.0), 0 mismatched + NFA: 2 absorbed (len 20/20/20.0), 2 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=82.00 loops=1) +(10 rows) + +-- 3-level END chain absorption: ((A (B C){2}){2})+ +-- step_size = (1 + (1+1)*2) * 2 = 10; v % 21 cycle gives 2 iterations +-- END chain: END(BC{2}) -> END(A..{2}) -> END(+, ABSORBABLE) +CREATE VIEW rpr_ev_ctx_absorb_endchain AS +SELECT count(*) OVER w +FROM generate_series(1, 42) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A (B C){2}){2})+) + DEFINE A AS v % 21 IN (1, 6, 11, 16), + B AS v % 21 IN (2, 4, 7, 9, 12, 14, 17, 19), + C AS v % 21 IN (3, 5, 8, 10, 13, 15, 18, 20) +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_ctx_absorb_endchain'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------------------- + PATTERN (((a (b c){2}){2})+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 42) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A (B C){2}){2})+) + DEFINE A AS v % 21 IN (1, 6, 11, 16), + B AS v % 21 IN (2, 4, 7, 9, 12, 14, 17, 19), + C AS v % 21 IN (3, 5, 8, 10, 13, 15, 18, 20) +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=42.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a' (b' c'){2}'){2}')+" + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 47 total, 0 merged + NFA Contexts: 5 peak, 43 total, 30 pruned + NFA: 2 matched (len 20/20/20.0), 0 mismatched + NFA: 2 absorbed (len 10/10/10.0), 8 skipped (len 1/5/3.0) + -> Function Scan on generate_series s (actual rows=42.00 loops=1) +(10 rows) + +-- No absorption when DEFINE uses FIRST (match_start-dependent) +-- Same pattern as rpr_ev_ctx_absorb_unbounded but with FIRST in DEFINE. +-- Compare: absorbed count should be 0 here vs >0 above. +CREATE VIEW rpr_ev_ctx_no_absorb_first AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 AND v > FIRST(v) +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_ctx_no_absorb_first'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 AND v > FIRST(v) +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+ b + Nav Mark Lookback: 0 + Nav Mark Lookahead: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 9 peak, 151 total, 0 merged + NFA Contexts: 5 peak, 51 total, 0 pruned + NFA: 10 matched (len 5/5/5.0), 0 mismatched + NFA: 0 absorbed, 40 skipped (len 1/4/2.5) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(11 rows) + +-- Absorption preserved when DEFINE uses only LAST without offset +-- LAST(v) is match_start-independent (always currentpos), so absorption +-- remains active. Compare: absorbed count should be >0, like the +-- PREV-only case above. +CREATE VIEW rpr_ev_ctx_absorb_last AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS LAST(v) % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_ctx_absorb_last'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS LAST(v) % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 91 total, 0 merged + NFA Contexts: 2 peak, 51 total, 0 pruned + NFA: 10 matched (len 5/5/5.0), 0 mismatched + NFA: 30 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(10 rows) + +-- No absorption with compound PREV(FIRST()) (match_start-dependent) +CREATE VIEW rpr_ev_ctx_no_absorb_compound AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 AND PREV(FIRST(v), 1) IS NOT NULL +); +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 AND PREV(FIRST(v), 1) IS NOT NULL +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+ b + Nav Mark Lookback: 0 + Nav Mark Lookahead: -1 + Storage: Memory Maximum Storage: NkB + NFA States: 9 peak, 151 total, 0 merged + NFA Contexts: 5 peak, 51 total, 0 pruned + NFA: 10 matched (len 4/5/4.9), 1 mismatched (len 5/5/5.0) + NFA: 0 absorbed, 39 skipped (len 1/4/2.5) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(11 rows) + +-- ============================================================ +-- Match Length Statistics Tests +-- ============================================================ +-- Fixed length matches - all same length +CREATE VIEW rpr_ev_mlen_fixed AS +SELECT count(*) OVER w +FROM rpr_nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_mlen_fixed'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------ + PATTERN (a b c d e) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM rpr_nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'', + D AS cat = ''D'', E AS cat = ''E'' +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b c d e + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 101 total, 0 merged + NFA Contexts: 3 peak, 101 total, 60 pruned + NFA: 20 matched (len 5/5/5.0), 0 mismatched + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) + -> Seq Scan on rpr_nfa_test (actual rows=100.00 loops=1) +(10 rows) + +-- Variable length matches - min/max/avg differ +CREATE VIEW rpr_ev_mlen_variable AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_mlen_variable'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 191 total, 0 merged + NFA Contexts: 2 peak, 101 total, 0 pruned + NFA: 10 matched (len 10/10/10.0), 0 mismatched + NFA: 80 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(10 rows) + +-- Very long matches +CREATE VIEW rpr_ev_mlen_long AS +SELECT count(*) OVER w +FROM generate_series(1, 200) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v <= 195, B AS v > 195 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_mlen_long'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 200) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v <= 195, B AS v > 195 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=200.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 396 total, 0 merged + NFA Contexts: 2 peak, 201 total, 4 pruned + NFA: 1 matched (len 196/196/196.0), 0 mismatched + NFA: 194 absorbed (len 1/1/1.0), 1 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=200.00 loops=1) +(10 rows) + +-- Uniform match length with mismatches from gap rows (v%20 = 11..15) +CREATE VIEW rpr_ev_mlen_with_mismatch AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS (v % 20 <> 0) AND (v % 20 <= 10 OR v % 20 > 15), + B AS v % 20 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_mlen_with_mismatch'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS (v % 20 <> 0) AND (v % 20 <= 10 OR v % 20 > 15), + B AS v % 20 = 0 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 171 total, 0 merged + NFA Contexts: 3 peak, 101 total, 25 pruned + NFA: 5 matched (len 5/5/5.0), 5 mismatched (len 11/11/11.0) + NFA: 60 absorbed (len 1/1/1.0), 5 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(10 rows) + +-- ============================================================ +-- Mismatch Length Statistics Tests +-- ============================================================ +-- Pattern with complete match every cycle: 0 mismatched +-- A(1,2,3) B(4,5) C(6) repeats perfectly; X rows are pruned, not mismatched +CREATE VIEW rpr_ev_mlen_no_mismatch AS +SELECT count(*) OVER w +FROM ( + SELECT v, + CASE WHEN v % 10 IN (1,2,3) THEN 'A' + WHEN v % 10 IN (4,5) THEN 'B' + WHEN v % 10 = 6 THEN 'C' + ELSE 'X' END AS cat + FROM generate_series(1, 100) AS s(v) +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+ C) + DEFINE A AS cat = 'A', B AS cat = 'B', C AS cat = 'C' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_mlen_no_mismatch'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------- + PATTERN (a+ b+ c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM ( + SELECT v, + CASE WHEN v % 10 IN (1,2,3) THEN ''A'' + WHEN v % 10 IN (4,5) THEN ''B'' + WHEN v % 10 = 6 THEN ''C'' + ELSE ''X'' END AS cat + FROM generate_series(1, 100) AS s(v) +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+ C) + DEFINE A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'' +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b+ c + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 151 total, 0 merged + NFA Contexts: 3 peak, 101 total, 60 pruned + NFA: 10 matched (len 6/6/6.0), 0 mismatched + NFA: 20 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(10 rows) + +-- Long partial matches that fail +CREATE VIEW rpr_ev_mlen_long_partial AS +SELECT count(*) OVER w +FROM ( + SELECT i AS v, + CASE + WHEN i <= 20 THEN 'A' + WHEN i <= 25 THEN 'B' + WHEN i = 26 THEN 'X' -- breaks the pattern + WHEN i <= 50 THEN 'A' + WHEN i <= 55 THEN 'B' + WHEN i = 56 THEN 'C' -- completes pattern + ELSE 'Y' + END AS cat + FROM generate_series(1, 60) i +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+ C) + DEFINE A AS cat = 'A', B AS cat = 'B', C AS cat = 'C' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_mlen_long_partial'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------- + PATTERN (a+ b+ c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM ( + SELECT i AS v, + CASE + WHEN i <= 20 THEN ''A'' + WHEN i <= 25 THEN ''B'' + WHEN i = 26 THEN ''X'' -- breaks the pattern + WHEN i <= 50 THEN ''A'' + WHEN i <= 55 THEN ''B'' + WHEN i = 56 THEN ''C'' -- completes pattern + ELSE ''Y'' + END AS cat + FROM generate_series(1, 60) i +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+ C) + DEFINE A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'' +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b+ c + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 115 total, 0 merged + NFA Contexts: 3 peak, 61 total, 15 pruned + NFA: 1 matched (len 30/30/30.0), 1 mismatched (len 26/26/26.0) + NFA: 42 absorbed (len 1/1/1.0), 1 skipped (len 1/1/1.0) + -> Function Scan on generate_series i (actual rows=60.00 loops=1) +(10 rows) + +-- ============================================================ +-- JSON Format Tests +-- ============================================================ +-- JSON format output with all statistics +CREATE VIEW rpr_ev_json_basic AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_json_basic'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (a+ b+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2 +)'); + rpr_explain_filter +---------------------------------------------------------------------------- + [ + + { + + "Plan": { + + "Node Type": "WindowAgg", + + "Parallel Aware": false, + + "Async Capable": false, + + "Actual Rows": 50.00, + + "Actual Loops": 1, + + "Disabled": false, + + "Window": "w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)",+ + "Pattern": "a+\" b+", + + "Nav Mark Lookback": 0, + + "Storage": "Memory", + + "Maximum Storage": 0, + + "NFA States Peak": 3, + + "NFA States Total": 85, + + "NFA States Merged": 0, + + "NFA Contexts Peak": 3, + + "NFA Contexts Total": 51, + + "NFA Contexts Absorbed": 0, + + "NFA Contexts Skipped": 17, + + "NFA Contexts Pruned": 16, + + "NFA Matched": 17, + + "NFA Mismatched": 0, + + "NFA Match Length Min": 2, + + "NFA Match Length Max": 2, + + "NFA Match Length Avg": 2.0, + + "NFA Skipped Length Min": 1, + + "NFA Skipped Length Max": 1, + + "NFA Skipped Length Avg": 1.0, + + "Plans": [ + + { + + "Node Type": "Function Scan", + + "Parent Relationship": "Outer", + + "Parallel Aware": false, + + "Async Capable": false, + + "Function Name": "generate_series", + + "Alias": "s", + + "Actual Rows": 50.00, + + "Actual Loops": 1, + + "Disabled": false + + } + + ] + + }, + + "Triggers": [ + + ] + + } + + ] +(1 row) + +-- JSON format with match length statistics +CREATE VIEW rpr_ev_json_matchlen AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_json_matchlen'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +)'); + rpr_explain_filter +---------------------------------------------------------------------------- + [ + + { + + "Plan": { + + "Node Type": "WindowAgg", + + "Parallel Aware": false, + + "Async Capable": false, + + "Actual Rows": 100.00, + + "Actual Loops": 1, + + "Disabled": false, + + "Window": "w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)",+ + "Pattern": "a+\" b", + + "Nav Mark Lookback": 0, + + "Storage": "Memory", + + "Maximum Storage": 0, + + "NFA States Peak": 3, + + "NFA States Total": 191, + + "NFA States Merged": 0, + + "NFA Contexts Peak": 2, + + "NFA Contexts Total": 101, + + "NFA Contexts Absorbed": 80, + + "NFA Contexts Skipped": 10, + + "NFA Contexts Pruned": 0, + + "NFA Matched": 10, + + "NFA Mismatched": 0, + + "NFA Match Length Min": 10, + + "NFA Match Length Max": 10, + + "NFA Match Length Avg": 10.0, + + "NFA Absorbed Length Min": 1, + + "NFA Absorbed Length Max": 1, + + "NFA Absorbed Length Avg": 1.0, + + "NFA Skipped Length Min": 1, + + "NFA Skipped Length Max": 1, + + "NFA Skipped Length Avg": 1.0, + + "Plans": [ + + { + + "Node Type": "Function Scan", + + "Parent Relationship": "Outer", + + "Parallel Aware": false, + + "Async Capable": false, + + "Function Name": "generate_series", + + "Alias": "s", + + "Actual Rows": 100.00, + + "Actual Loops": 1, + + "Disabled": false + + } + + ] + + }, + + "Triggers": [ + + ] + + } + + ] +(1 row) + +-- JSON format with mismatch statistics +-- Pattern A B C expects 1,2,3 but gets 1,2,4 twice causing mismatches +CREATE VIEW rpr_ev_json_mismatch AS +SELECT count(*) OVER w +FROM (VALUES (1),(2),(4), (1),(2),(4), (1),(2),(3)) AS t(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v = 1, B AS v = 2, C AS v = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_json_mismatch'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (a b c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) +SELECT count(*) OVER w +FROM (VALUES (1),(2),(4), (1),(2),(4), (1),(2),(3)) AS t(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v = 1, B AS v = 2, C AS v = 3 +)'); + rpr_explain_filter +---------------------------------------------------------------------------- + [ + + { + + "Plan": { + + "Node Type": "WindowAgg", + + "Parallel Aware": false, + + "Async Capable": false, + + "Actual Rows": 9.00, + + "Actual Loops": 1, + + "Disabled": false, + + "Window": "w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)",+ + "Pattern": "a b c", + + "Nav Mark Lookback": 0, + + "Storage": "Memory", + + "Maximum Storage": 0, + + "NFA States Peak": 2, + + "NFA States Total": 10, + + "NFA States Merged": 0, + + "NFA Contexts Peak": 3, + + "NFA Contexts Total": 10, + + "NFA Contexts Absorbed": 0, + + "NFA Contexts Skipped": 1, + + "NFA Contexts Pruned": 5, + + "NFA Matched": 1, + + "NFA Mismatched": 2, + + "NFA Match Length Min": 3, + + "NFA Match Length Max": 3, + + "NFA Match Length Avg": 3.0, + + "NFA Mismatch Length Min": 3, + + "NFA Mismatch Length Max": 3, + + "NFA Mismatch Length Avg": 3.0, + + "NFA Skipped Length Min": 1, + + "NFA Skipped Length Max": 1, + + "NFA Skipped Length Avg": 1.0, + + "Plans": [ + + { + + "Node Type": "Values Scan", + + "Parent Relationship": "Outer", + + "Parallel Aware": false, + + "Async Capable": false, + + "Alias": "*VALUES*", + + "Actual Rows": 9.00, + + "Actual Loops": 1, + + "Disabled": false + + } + + ] + + }, + + "Triggers": [ + + ] + + } + + ] +(1 row) + +-- JSON format with skipped context statistics +-- Alternation pattern with SKIP PAST LAST ROW causes many contexts to be skipped +CREATE VIEW rpr_ev_json_skip AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B)) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_json_skip'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------------------------------------------------------------ + PATTERN ((a | b) (a | b) (a | b) (a | b) (a | b) (a | b) (a | b) (a | b)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B)) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +)'); + rpr_explain_filter +---------------------------------------------------------------------------- + [ + + { + + "Plan": { + + "Node Type": "WindowAgg", + + "Parallel Aware": false, + + "Async Capable": false, + + "Actual Rows": 100.00, + + "Actual Loops": 1, + + "Disabled": false, + + "Window": "w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)",+ + "Pattern": "(a | b){8}", + + "Nav Mark Lookback": 0, + + "Storage": "Memory", + + "Maximum Storage": 0, + + "NFA States Peak": 17, + + "NFA States Total": 995, + + "NFA States Merged": 0, + + "NFA Contexts Peak": 8, + + "NFA Contexts Total": 101, + + "NFA Contexts Absorbed": 0, + + "NFA Contexts Skipped": 84, + + "NFA Contexts Pruned": 1, + + "NFA Matched": 12, + + "NFA Mismatched": 3, + + "NFA Match Length Min": 8, + + "NFA Match Length Max": 8, + + "NFA Match Length Avg": 8.0, + + "NFA Mismatch Length Min": 2, + + "NFA Mismatch Length Max": 4, + + "NFA Mismatch Length Avg": 3.0, + + "NFA Skipped Length Min": 1, + + "NFA Skipped Length Max": 7, + + "NFA Skipped Length Avg": 4.0, + + "Plans": [ + + { + + "Node Type": "Function Scan", + + "Parent Relationship": "Outer", + + "Parallel Aware": false, + + "Async Capable": false, + + "Function Name": "generate_series", + + "Alias": "s", + + "Actual Rows": 100.00, + + "Actual Loops": 1, + + "Disabled": false + + } + + ] + + }, + + "Triggers": [ + + ] + + } + + ] +(1 row) + +-- ============================================================ +-- XML Format Tests +-- ============================================================ +-- XML format output +CREATE VIEW rpr_ev_xml_basic AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_xml_basic'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------ + PATTERN (a b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT XML) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +)'); + rpr_explain_filter +-------------------------------------------------------------------------------- + + + + + + + WindowAgg + + false + + false + + 30.00 + + 1 + + false + + w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)+ + a b + + 0 + + Memory + + 0 + + 2 + + 31 + + 0 + + 2 + + 31 + + 0 + + 15 + + 0 + + 15 + + 0 + + 2 + + 2 + + 2.0 + + 1 + + 1 + + 1.0 + + + + + + Function Scan + + Outer + + false + + false + + generate_series + + s + + 30.00 + + 1 + + false + + + + + + + + + + + + + + +(1 row) + +-- ============================================================ +-- Multiple Partitions Tests +-- ============================================================ +-- Statistics across multiple partitions +CREATE VIEW rpr_ev_part_multi AS +SELECT count(*) OVER w +FROM ( + SELECT p, v + FROM generate_series(1, 3) p, + generate_series(1, 30) v +) t +WINDOW w AS ( + PARTITION BY p + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_part_multi'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM ( + SELECT p, v + FROM generate_series(1, 3) p, + generate_series(1, 30) v +) t +WINDOW w AS ( + PARTITION BY p + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +------------------------------------------------------------------------------------ + WindowAgg (actual rows=90.00 loops=1) + Window: w AS (PARTITION BY p.p ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 165 total, 0 merged + NFA Contexts: 2 peak, 93 total, 0 pruned + NFA: 18 matched (len 5/5/5.0), 0 mismatched + NFA: 54 absorbed (len 1/1/1.0), 18 skipped (len 1/1/1.0) + -> Sort (actual rows=90.00 loops=1) + Sort Key: p.p + Sort Method: quicksort Memory: NkB + -> Nested Loop (actual rows=90.00 loops=1) + -> Function Scan on generate_series p (actual rows=3.00 loops=1) + -> Function Scan on generate_series v (actual rows=30.00 loops=3) +(15 rows) + +-- Different pattern behavior per partition +CREATE VIEW rpr_ev_part_diff AS +SELECT count(*) OVER w +FROM ( + SELECT + CASE WHEN v <= 25 THEN 1 ELSE 2 END AS p, + v % 10 AS val + FROM generate_series(1, 50) v +) t +WINDOW w AS ( + PARTITION BY p + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS val < 5, B AS val >= 5 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_part_diff'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM ( + SELECT + CASE WHEN v <= 25 THEN 1 ELSE 2 END AS p, + v % 10 AS val + FROM generate_series(1, 50) v +) t +WINDOW w AS ( + PARTITION BY p + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS val < 5, B AS val >= 5 +);'); + rpr_explain_filter +-------------------------------------------------------------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (PARTITION BY (CASE WHEN (v.v <= 25) THEN 1 ELSE 2 END) ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 77 total, 0 merged + NFA Contexts: 2 peak, 52 total, 21 pruned + NFA: 5 matched (len 5/6/5.8), 0 mismatched + NFA: 19 absorbed (len 1/1/1.0), 5 skipped (len 1/1/1.0) + -> Sort (actual rows=50.00 loops=1) + Sort Key: (CASE WHEN (v.v <= 25) THEN 1 ELSE 2 END) + Sort Method: quicksort Memory: NkB + -> Function Scan on generate_series v (actual rows=50.00 loops=1) +(13 rows) + +-- ============================================================ +-- Edge Cases +-- ============================================================ +-- Empty result set +CREATE VIEW rpr_ev_edge_empty AS +SELECT count(*) OVER w +FROM generate_series(1, 0) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v = 1, B AS v = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_edge_empty'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------ + PATTERN (a b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 0) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v = 1, B AS v = 2 +);'); + rpr_explain_filter +--------------------------------------------------------------------- + WindowAgg (actual rows=0.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b + Nav Mark Lookback: 0 + -> Function Scan on generate_series s (actual rows=0.00 loops=1) +(5 rows) + +-- Single row +CREATE VIEW rpr_ev_edge_single_row AS +SELECT count(*) OVER w +FROM generate_series(1, 1) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A) + DEFINE A AS TRUE +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_edge_single_row'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------- + PATTERN (a) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 1) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A) + DEFINE A AS TRUE +);'); + rpr_explain_filter +--------------------------------------------------------------------- + WindowAgg (actual rows=1.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 2 total, 0 merged + NFA Contexts: 2 peak, 2 total, 0 pruned + NFA: 1 matched (len 1/1/1.0), 0 mismatched + -> Function Scan on generate_series s (actual rows=1.00 loops=1) +(9 rows) + +-- Pattern longer than data +CREATE VIEW rpr_ev_edge_pattern_longer AS +SELECT count(*) OVER w +FROM generate_series(1, 5) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E F G H I J) + DEFINE + A AS v = 1, B AS v = 2, C AS v = 3, D AS v = 4, E AS v = 5, + F AS v = 6, G AS v = 7, H AS v = 8, I AS v = 9, J AS v = 10 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_edge_pattern_longer'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------------------- + PATTERN (a b c d e f g h i j) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 5) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E F G H I J) + DEFINE + A AS v = 1, B AS v = 2, C AS v = 3, D AS v = 4, E AS v = 5, + F AS v = 6, G AS v = 7, H AS v = 8, I AS v = 9, J AS v = 10 +);'); + rpr_explain_filter +--------------------------------------------------------------------- + WindowAgg (actual rows=5.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b c d e f g h i j + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 6 total, 0 merged + NFA Contexts: 3 peak, 6 total, 4 pruned + NFA: 0 matched, 1 mismatched (len 5/5/5.0) + -> Function Scan on generate_series s (actual rows=5.00 loops=1) +(9 rows) + +-- All rows match as single match +CREATE VIEW rpr_ev_edge_single_match AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS TRUE +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_edge_single_match'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------- + PATTERN (a+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS TRUE +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 101 total, 0 merged + NFA Contexts: 2 peak, 51 total, 0 pruned + NFA: 1 matched (len 50/50/50.0), 0 mismatched + NFA: 49 absorbed (len 1/1/1.0), 0 skipped + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(10 rows) + +-- ============================================================ +-- Complex Pattern Tests +-- ============================================================ +-- Nested groups +CREATE VIEW rpr_ev_cpx_nested AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B) C)+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_cpx_nested'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------- + PATTERN (((a b) c)+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B) C)+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b' c')+" + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 81 total, 0 merged + NFA Contexts: 4 peak, 61 total, 20 pruned + NFA: 1 matched (len 60/60/60.0), 0 mismatched + NFA: 19 absorbed (len 3/3/3.0), 20 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(10 rows) + +-- Multiple alternations +CREATE VIEW rpr_ev_cpx_multi_alt AS +SELECT count(*) OVER w +FROM rpr_nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (C | D | E)) + DEFINE + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_cpx_multi_alt'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------------------- + PATTERN ((a | b) (c | d | e)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM rpr_nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (C | D | E)) + DEFINE + A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'', + D AS cat = ''D'', E AS cat = ''E'' +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b) (c | d | e) + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 6 peak, 423 total, 0 merged + NFA Contexts: 3 peak, 101 total, 40 pruned + NFA: 20 matched (len 2/2/2.0), 20 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) + -> Seq Scan on rpr_nfa_test (actual rows=100.00 loops=1) +(10 rows) + +-- Optional elements +CREATE VIEW rpr_ev_cpx_optional AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B? C) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_cpx_optional'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN (a b? c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B? C) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b? c + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 64 total, 0 merged + NFA Contexts: 3 peak, 51 total, 25 pruned + NFA: 12 matched (len 3/3/3.0), 1 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 12 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(10 rows) + +-- Bounded quantifiers +CREATE VIEW rpr_ev_cpx_bounded AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,5} B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_cpx_bounded'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------- + PATTERN (a{2,5} b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,5} B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{2,5} b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 9 peak, 311 total, 0 merged + NFA Contexts: 7 peak, 101 total, 0 pruned + NFA: 10 matched (len 6/6/6.0), 40 mismatched (len 6/6/6.0) + NFA: 0 absorbed, 50 skipped (len 1/5/3.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(10 rows) + +-- Star quantifier +CREATE VIEW rpr_ev_cpx_star AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B* C) + DEFINE A AS v % 10 = 1, B AS v % 10 IN (2,3,4,5,6,7,8), C AS v % 10 = 9 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_cpx_star'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN (a b* c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B* C) + DEFINE A AS v % 10 = 1, B AS v % 10 IN (2,3,4,5,6,7,8), C AS v % 10 = 9 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b* c + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 91 total, 0 merged + NFA Contexts: 3 peak, 51 total, 40 pruned + NFA: 5 matched (len 9/9/9.0), 0 mismatched + NFA: 0 absorbed, 5 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(10 rows) + +-- ============================================================ +-- Real-world Pattern Examples +-- ============================================================ +-- Stock price pattern - V-shape (down then up) +CREATE VIEW rpr_ev_real_vshape AS +SELECT count(*) OVER w +FROM rpr_nfa_complex +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (D+ U+) + DEFINE D AS trend = 'D', U AS trend = 'U' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_real_vshape'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (d+ u+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM rpr_nfa_complex +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (D+ U+) + DEFINE D AS trend = ''D'', U AS trend = ''U'' +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: d+" u+ + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 58 total, 0 merged + NFA Contexts: 3 peak, 31 total, 3 pruned + NFA: 3 matched (len 3/14/8.0), 1 mismatched (len 3/3/3.0) + NFA: 9 absorbed (len 1/1/1.0), 14 skipped (len 1/1/1.0) + -> Seq Scan on rpr_nfa_complex (actual rows=30.00 loops=1) +(10 rows) + +-- Stock price pattern - peak (up, stable, down) +CREATE VIEW rpr_ev_real_peak AS +SELECT count(*) OVER w +FROM rpr_nfa_complex +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (U+ S* D+) + DEFINE U AS trend = 'U', S AS trend = 'S', D AS trend = 'D' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_real_peak'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------- + PATTERN (u+ s* d+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM rpr_nfa_complex +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (U+ S* D+) + DEFINE U AS trend = ''U'', S AS trend = ''S'', D AS trend = ''D'' +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: u+" s* d+ + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 76 total, 0 merged + NFA Contexts: 3 peak, 31 total, 1 pruned + NFA: 4 matched (len 3/11/7.2), 0 mismatched + NFA: 12 absorbed (len 1/1/1.0), 13 skipped (len 1/1/1.0) + -> Seq Scan on rpr_nfa_complex (actual rows=30.00 loops=1) +(10 rows) + +-- Consecutive increasing values (using PREV) +CREATE VIEW rpr_ev_real_increasing AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3,}) + DEFINE A AS v > PREV(v) OR PREV(v) IS NULL +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_real_increasing'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (a{3,}) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3,}) + DEFINE A AS v > PREV(v) OR PREV(v) IS NULL +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{3,}" + Nav Mark Lookback: 1 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 99 total, 0 merged + NFA Contexts: 2 peak, 51 total, 0 pruned + NFA: 1 matched (len 50/50/50.0), 0 mismatched + NFA: 49 absorbed (len 1/1/1.0), 0 skipped + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(10 rows) + +-- ============================================================ +-- Performance-oriented Tests +-- ============================================================ +-- Large dataset with simple pattern +CREATE VIEW rpr_ev_perf_large_simple AS +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_perf_large_simple'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------ + PATTERN (a b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); + rpr_explain_filter +------------------------------------------------------------------------ + WindowAgg (actual rows=1000.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 1001 total, 0 merged + NFA Contexts: 2 peak, 1001 total, 0 pruned + NFA: 500 matched (len 2/2/2.0), 0 mismatched + NFA: 0 absorbed, 500 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=1000.00 loops=1) +(10 rows) + +-- Large dataset with absorption +CREATE VIEW rpr_ev_perf_large_absorb AS +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 100 <> 0, B AS v % 100 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_perf_large_absorb'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 100 <> 0, B AS v % 100 = 0 +);'); + rpr_explain_filter +------------------------------------------------------------------------ + WindowAgg (actual rows=1000.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 1991 total, 0 merged + NFA Contexts: 2 peak, 1001 total, 0 pruned + NFA: 10 matched (len 100/100/100.0), 0 mismatched + NFA: 980 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=1000.00 loops=1) +(10 rows) + +-- High state merge ratio +CREATE VIEW rpr_ev_perf_high_merge AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_perf_high_merge'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------- + PATTERN ((a | b)+ c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=500.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b)+ c + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 9 peak, 3006 total, 0 merged + NFA Contexts: 3 peak, 501 total, 1 pruned + NFA: 166 matched (len 3/3/3.0), 1 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 332 skipped (len 1/2/1.5) + -> Function Scan on generate_series s (actual rows=500.00 loops=1) +(10 rows) + +-- ============================================================ +-- INITIAL vs no INITIAL comparison +-- ============================================================ +-- With INITIAL keyword +CREATE VIEW rpr_ev_initial_with AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_initial_with'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 91 total, 0 merged + NFA Contexts: 2 peak, 51 total, 0 pruned + NFA: 10 matched (len 5/5/5.0), 0 mismatched + NFA: 30 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(10 rows) + +-- Without INITIAL keyword (same behavior currently) +CREATE VIEW rpr_ev_initial_without AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_initial_without'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 91 total, 0 merged + NFA Contexts: 2 peak, 51 total, 0 pruned + NFA: 10 matched (len 5/5/5.0), 0 mismatched + NFA: 30 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(10 rows) + +-- ============================================================ +-- Quantifier Variations +-- ============================================================ +-- Plus quantifier +CREATE VIEW rpr_ev_quant_plus AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS v % 4 <> 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_quant_plus'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------- + PATTERN (a+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS v % 4 <> 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 71 total, 0 merged + NFA Contexts: 3 peak, 41 total, 10 pruned + NFA: 10 matched (len 3/3/3.0), 0 mismatched + NFA: 20 absorbed (len 1/1/1.0), 0 skipped + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(10 rows) + +-- Star quantifier (zero or more) +CREATE VIEW rpr_ev_quant_star AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A* B) + DEFINE A AS v % 4 IN (1, 2), B AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_quant_star'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a* b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A* B) + DEFINE A AS v % 4 IN (1, 2), B AS v % 4 = 3 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a*" b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 102 total, 0 merged + NFA Contexts: 2 peak, 41 total, 10 pruned + NFA: 10 matched (len 3/3/3.0), 0 mismatched + NFA: 10 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(10 rows) + +-- Question mark (zero or one) +CREATE VIEW rpr_ev_quant_question AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A? B C) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_quant_question'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN (a? b c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A? B C) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a? b c + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 82 total, 0 merged + NFA Contexts: 3 peak, 41 total, 10 pruned + NFA: 10 matched (len 3/3/3.0), 0 mismatched + NFA: 0 absorbed, 20 skipped (len 1/2/1.5) + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(10 rows) + +-- Exact count {n} +CREATE VIEW rpr_ev_quant_exact AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_quant_exact'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN (a{3} b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{3} b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 51 total, 0 merged + NFA Contexts: 5 peak, 51 total, 0 pruned + NFA: 10 matched (len 4/4/4.0), 10 mismatched (len 4/4/4.0) + NFA: 0 absorbed, 30 skipped (len 1/3/2.0) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(10 rows) + +-- Range {n,m} +CREATE VIEW rpr_ev_quant_range AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,4} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_quant_range'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------- + PATTERN (a{2,4} b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,4} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{2,4} b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 7 peak, 101 total, 0 merged + NFA Contexts: 5 peak, 51 total, 0 pruned + NFA: 10 matched (len 5/5/5.0), 0 mismatched + NFA: 0 absorbed, 40 skipped (len 1/4/2.5) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(10 rows) + +-- At least {n,} +CREATE VIEW rpr_ev_quant_atleast AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3,} B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_quant_atleast'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------- + PATTERN (a{3,} b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3,} B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{3,}" b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 86 total, 0 merged + NFA Contexts: 2 peak, 51 total, 0 pruned + NFA: 5 matched (len 10/10/10.0), 0 mismatched + NFA: 40 absorbed (len 1/1/1.0), 5 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(10 rows) + +-- ============================================================ +-- Regression Tests for Statistics Accuracy +-- ============================================================ +-- Verify state count accuracy +-- Pattern A+ B with 20 rows should show predictable state behavior +CREATE VIEW rpr_ev_reg_state_count AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_reg_state_count'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=20.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 37 total, 0 merged + NFA Contexts: 2 peak, 21 total, 0 pruned + NFA: 4 matched (len 5/5/5.0), 0 mismatched + NFA: 12 absorbed (len 1/1/1.0), 4 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=20.00 loops=1) +(10 rows) + +-- Verify context count with known absorption +CREATE VIEW rpr_ev_reg_ctx_absorb AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C) + DEFINE A AS v % 10 IN (1,2,3,4,5,6,7), B AS v % 10 = 8, C AS v % 10 = 9 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_reg_ctx_absorb'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN (a+ b c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C) + DEFINE A AS v % 10 IN (1,2,3,4,5,6,7), B AS v % 10 = 8, C AS v % 10 = 9 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b c + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 52 total, 0 merged + NFA Contexts: 3 peak, 31 total, 6 pruned + NFA: 3 matched (len 9/9/9.0), 0 mismatched + NFA: 18 absorbed (len 1/1/1.0), 3 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=30.00 loops=1) +(10 rows) + +-- Verify match length with fixed-length pattern +CREATE VIEW rpr_ev_reg_matchlen AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_reg_matchlen'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (a b c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b c + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 31 total, 0 merged + NFA Contexts: 3 peak, 31 total, 10 pruned + NFA: 10 matched (len 3/3/3.0), 0 mismatched + NFA: 0 absorbed, 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=30.00 loops=1) +(10 rows) + +-- ============================================================ +-- Alternation Pattern Tests +-- ============================================================ +-- Simple alternation +CREATE VIEW rpr_ev_alt_simple AS +SELECT count(*) OVER w +FROM rpr_nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) C) + DEFINE A AS cat = 'A', B AS cat = 'B', C AS cat = 'C' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_alt_simple'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------ + PATTERN ((a | b) c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM rpr_nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) C) + DEFINE A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'' +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b) c + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 303 total, 0 merged + NFA Contexts: 3 peak, 101 total, 40 pruned + NFA: 20 matched (len 2/2/2.0), 20 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) + -> Seq Scan on rpr_nfa_test (actual rows=100.00 loops=1) +(10 rows) + +-- Multiple items in alternation +CREATE VIEW rpr_ev_alt_multi_item AS +SELECT count(*) OVER w +FROM rpr_nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C | D) E) + DEFINE + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_alt_multi_item'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------------- + PATTERN ((a | b | c | d) e) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM rpr_nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C | D) E) + DEFINE + A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'', + D AS cat = ''D'', E AS cat = ''E'' +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b | c | d) e + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 6 peak, 505 total, 0 merged + NFA Contexts: 3 peak, 101 total, 0 pruned + NFA: 20 matched (len 2/2/2.0), 60 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) + -> Seq Scan on rpr_nfa_test (actual rows=100.00 loops=1) +(10 rows) + +-- Alternation with quantifiers +CREATE VIEW rpr_ev_alt_with_quant AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_alt_with_quant'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------- + PATTERN ((a | b)+ c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b)+ c + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 9 peak, 306 total, 0 merged + NFA Contexts: 3 peak, 51 total, 1 pruned + NFA: 16 matched (len 3/3/3.0), 1 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 32 skipped (len 1/2/1.5) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(10 rows) + +-- Multiple alternatives (4+) +CREATE VIEW rpr_ev_alt_four_plus AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A | B | C | D | E) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_alt_four_plus'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------------- + PATTERN (a | b | c | d | e) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A | B | C | D | E) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b | c | d | e) + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 7 peak, 606 total, 0 merged + NFA Contexts: 2 peak, 101 total, 0 pruned + NFA: 100 matched (len 1/1/1.0), 0 mismatched + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(9 rows) + +-- Alternation at start +CREATE VIEW rpr_ev_alt_at_start AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_alt_at_start'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN ((a | b) c d) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b) c d + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 183 total, 0 merged + NFA Contexts: 3 peak, 61 total, 16 pruned + NFA: 15 matched (len 3/3/3.0), 14 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 15 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(10 rows) + +-- Multiple sequential alternations +CREATE VIEW rpr_ev_alt_sequential AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C (D | E) F) + DEFINE A AS v % 6 = 0, B AS v % 6 = 1, C AS v % 6 = 2, D AS v % 6 = 3, E AS v % 6 = 4, F AS v % 6 = 5 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_alt_sequential'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------------------- + PATTERN ((a | b) c (d | e) f) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C (D | E) F) + DEFINE A AS v % 6 = 0, B AS v % 6 = 1, C AS v % 6 = 2, D AS v % 6 = 3, E AS v % 6 = 4, F AS v % 6 = 5 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b) c (d | e) f + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 337 total, 0 merged + NFA Contexts: 3 peak, 101 total, 67 pruned + NFA: 0 matched, 33 mismatched (len 2/4/3.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(9 rows) + +-- Quantified alternatives +CREATE VIEW rpr_ev_alt_quantified AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A+ | B+) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_alt_quantified'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN ((a+ | b+) c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A+ | B+) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a+" | b+") c + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 223 total, 0 merged + NFA Contexts: 3 peak, 61 total, 1 pruned + NFA: 20 matched (len 2/2/2.0), 19 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(10 rows) + +-- Alternation at end +CREATE VIEW rpr_ev_alt_at_end AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_alt_at_end'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN (a b (c | d)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b (c | d) + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 89 total, 0 merged + NFA Contexts: 3 peak, 61 total, 32 pruned + NFA: 14 matched (len 3/3/3.0), 0 mismatched + NFA: 0 absorbed, 14 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(10 rows) + +-- Nested ALT at start of branch inside outer ALT +-- Pattern: (A ((B | C) D | E)) - preceding VAR + inner ALT as first branch element +CREATE VIEW rpr_ev_alt_nested_start AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) D | E)) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_alt_nested_start'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------------- + PATTERN (a ((b | c) d | e)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) D | E)) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=20.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a ((b | c) d | e) + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 37 total, 0 merged + NFA Contexts: 3 peak, 21 total, 17 pruned + NFA: 0 matched, 3 mismatched (len 3/3/3.0) + -> Function Scan on generate_series s (actual rows=20.00 loops=1) +(9 rows) + +-- Nested ALT at end of branch inside outer ALT +-- Pattern: (C (A | B) | D) - inner ALT is last element in outer branch +CREATE VIEW rpr_ev_alt_nested_end AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (C (A | B) | D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_alt_nested_end'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------------- + PATTERN (c (a | b) | d) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (C (A | B) | D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=20.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (c (a | b) | d) + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 73 total, 0 merged + NFA Contexts: 3 peak, 21 total, 10 pruned + NFA: 5 matched (len 1/1/1.0), 5 mismatched (len 2/2/2.0) + -> Function Scan on generate_series s (actual rows=20.00 loops=1) +(9 rows) + +-- ============================================================ +-- Group Pattern Tests +-- ============================================================ +-- Simple group +CREATE VIEW rpr_ev_grp_simple AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_grp_simple'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN ((a b)+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b')+" + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 61 total, 0 merged + NFA Contexts: 3 peak, 41 total, 0 pruned + NFA: 1 matched (len 40/40/40.0), 0 mismatched + NFA: 19 absorbed (len 2/2/2.0), 20 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(10 rows) + +-- Group with bounded quantifier +CREATE VIEW rpr_ev_grp_bounded AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B){2,4}) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_grp_bounded'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------- + PATTERN ((a b){2,4}) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B){2,4}) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b){2,4} + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 51 total, 0 merged + NFA Contexts: 3 peak, 41 total, 5 pruned + NFA: 5 matched (len 8/8/8.0), 0 mismatched + NFA: 0 absorbed, 30 skipped (len 1/2/1.5) + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(10 rows) + +-- Nested groups +CREATE VIEW rpr_ev_grp_nested AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B){2})+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_grp_nested'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN (((a b){2})+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B){2})+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a' b'){2}')+" + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 76 total, 0 merged + NFA Contexts: 4 peak, 61 total, 15 pruned + NFA: 1 matched (len 60/60/60.0), 0 mismatched + NFA: 14 absorbed (len 4/4/4.0), 30 skipped (len 1/2/1.5) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(10 rows) + +-- Deep nesting (3+ levels) +CREATE VIEW rpr_ev_grp_deep AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((((A | B)+)+)+) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_grp_deep'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------------- + PATTERN ((((a | b)+)+)+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((((A | B)+)+)+) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b)+ + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 6 peak, 243 total, 0 merged + NFA Contexts: 2 peak, 41 total, 0 pruned + NFA: 1 matched (len 40/40/40.0), 0 mismatched + NFA: 0 absorbed, 39 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(10 rows) + +-- Bounded quantifier on alternation +CREATE VIEW rpr_ev_grp_bounded_alt AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B){2,3} C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_grp_bounded_alt'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------------- + PATTERN ((a | b){2,3} c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B){2,3} C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b){2,3} c + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 8 peak, 320 total, 0 merged + NFA Contexts: 3 peak, 61 total, 2 pruned + NFA: 19 matched (len 3/3/3.0), 1 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 38 skipped (len 1/2/1.5) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(10 rows) + +-- Nested groups with quantifiers +CREATE VIEW rpr_ev_grp_nested_quant AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B)+ C)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_grp_nested_quant'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN (((a b)+ c)*) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B)+ C)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a' b')+" c)* + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 9 peak, 178 total, 0 merged + NFA Contexts: 4 peak, 61 total, 20 pruned + NFA: 3 matched (len 0/57/19.0), 0 mismatched + NFA: 0 absorbed, 37 skipped (len 1/3/2.0) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(10 rows) + +-- Partial nested quantification +CREATE VIEW rpr_ev_grp_partial_quant AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A (B C)+)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_grp_partial_quant'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN ((a (b c)+)*) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A (B C)+)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a (b c)+)* + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 7 peak, 160 total, 0 merged + NFA Contexts: 4 peak, 61 total, 20 pruned + NFA: 3 matched (len 0/57/19.0), 0 mismatched + NFA: 0 absorbed, 37 skipped (len 1/3/2.0) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(10 rows) + +-- ============================================================ +-- Window Function Combinations +-- ============================================================ +-- count(*) with pattern +CREATE VIEW rpr_ev_wfn_count AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_wfn_count'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 55 total, 0 merged + NFA Contexts: 2 peak, 31 total, 0 pruned + NFA: 6 matched (len 5/5/5.0), 0 mismatched + NFA: 18 absorbed (len 1/1/1.0), 6 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=30.00 loops=1) +(10 rows) + +-- first_value with pattern +CREATE VIEW rpr_ev_wfn_first_value AS +SELECT first_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_wfn_first_value'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT first_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 55 total, 0 merged + NFA Contexts: 2 peak, 31 total, 0 pruned + NFA: 6 matched (len 5/5/5.0), 0 mismatched + NFA: 18 absorbed (len 1/1/1.0), 6 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=30.00 loops=1) +(10 rows) + +-- last_value with pattern +CREATE VIEW rpr_ev_wfn_last_value AS +SELECT last_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_wfn_last_value'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT last_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 55 total, 0 merged + NFA Contexts: 2 peak, 31 total, 0 pruned + NFA: 6 matched (len 5/5/5.0), 0 mismatched + NFA: 18 absorbed (len 1/1/1.0), 6 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=30.00 loops=1) +(10 rows) + +-- Multiple window functions +CREATE VIEW rpr_ev_wfn_multi AS +SELECT + count(*) OVER w, + first_value(v) OVER w, + last_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_wfn_multi'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT + count(*) OVER w, + first_value(v) OVER w, + last_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 55 total, 0 merged + NFA Contexts: 2 peak, 31 total, 0 pruned + NFA: 6 matched (len 5/5/5.0), 0 mismatched + NFA: 18 absorbed (len 1/1/1.0), 6 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=30.00 loops=1) +(10 rows) + +-- ============================================================ +-- DEFINE Expression Variations +-- ============================================================ +-- Complex boolean expressions +CREATE VIEW rpr_ev_def_complex_bool AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS (v % 5 <> 0) AND (v % 3 <> 0), + B AS (v % 5 = 0) OR (v % 3 = 0) +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_def_complex_bool'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS (v % 5 <> 0) AND (v % 3 <> 0), + B AS (v % 5 = 0) OR (v % 3 = 0) +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=50.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 78 total, 0 merged + NFA Contexts: 2 peak, 51 total, 6 pruned + NFA: 17 matched (len 2/3/2.6), 0 mismatched + NFA: 10 absorbed (len 1/1/1.0), 17 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=50.00 loops=1) +(10 rows) + +-- Using PREV function +CREATE VIEW rpr_ev_def_prev AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (S U+ D+) + DEFINE + S AS TRUE, + U AS v > PREV(v), + D AS v < PREV(v) +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_def_prev'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------- + PATTERN (s u+ d+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (S U+ D+) + DEFINE + S AS TRUE, + U AS v > PREV(v), + D AS v < PREV(v) +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: s u+ d+ + Nav Mark Lookback: 1 + Storage: Memory Maximum Storage: NkB + NFA States: 60 peak, 466 total, 0 merged + NFA Contexts: 31 peak, 31 total, 1 pruned + NFA: 0 matched, 29 mismatched (len 2/30/16.0) + -> Function Scan on generate_series s (actual rows=30.00 loops=1) +(9 rows) + +-- Using 1-arg PREV (implicit offset 1) +CREATE VIEW rpr_ev_nav_prev1 AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE + A AS TRUE, + B AS v > PREV(v) +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_nav_prev1'), E'\n')) AS line WHERE line ~ 'PATTERN|DEFINE|PREV|NEXT'; + line +------------------------- + PATTERN (a b+) + DEFINE + b AS (v > PREV(v)) ); +(3 rows) + +-- Using 1-arg NEXT (implicit offset 1) +CREATE VIEW rpr_ev_nav_next1 AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE + A AS TRUE, + B AS v < NEXT(v) +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_nav_next1'), E'\n')) AS line WHERE line ~ 'PATTERN|DEFINE|PREV|NEXT'; + line +------------------------- + PATTERN (a b+) + DEFINE + b AS (v < NEXT(v)) ); +(3 rows) + +-- Using 2-arg PREV (explicit offset) +CREATE VIEW rpr_ev_nav_prev2 AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE + A AS TRUE, + B AS v > PREV(v, 2) +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_nav_prev2'), E'\n')) AS line WHERE line ~ 'PATTERN|DEFINE|PREV|NEXT'; + line +-------------------------------------- + PATTERN (a b+) + DEFINE + b AS (v > PREV(v, (2)::bigint)) ); +(3 rows) + +-- Using 2-arg NEXT (explicit offset) +CREATE VIEW rpr_ev_nav_next2 AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE + A AS TRUE, + B AS v < NEXT(v, 2) +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_nav_next2'), E'\n')) AS line WHERE line ~ 'PATTERN|DEFINE|PREV|NEXT'; + line +-------------------------------------- + PATTERN (a b+) + DEFINE + b AS (v < NEXT(v, (2)::bigint)) ); +(3 rows) + +-- Using NULL comparisons +CREATE VIEW rpr_ev_def_null AS +SELECT count(*) OVER w +FROM ( + SELECT CASE WHEN v % 5 = 0 THEN NULL ELSE v END AS v + FROM generate_series(1, 30) v +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v IS NOT NULL, B AS v IS NULL +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_def_null'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM ( + SELECT CASE WHEN v % 5 = 0 THEN NULL ELSE v END AS v + FROM generate_series(1, 30) v +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v IS NOT NULL, B AS v IS NULL +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 55 total, 0 merged + NFA Contexts: 2 peak, 31 total, 0 pruned + NFA: 6 matched (len 5/5/5.0), 0 mismatched + NFA: 18 absorbed (len 1/1/1.0), 6 skipped (len 1/1/1.0) + -> Function Scan on generate_series v (actual rows=30.00 loops=1) +(10 rows) + +-- ============================================================ +-- Large Scale Statistics Verification +-- ============================================================ +-- 500 rows - verify statistics scale correctly +CREATE VIEW rpr_ev_scale_500rows AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C) + DEFINE A AS v % 10 < 7, B AS v % 10 = 7, C AS v % 10 = 8 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_scale_500rows'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN (a+ b c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C) + DEFINE A AS v % 10 < 7, B AS v % 10 = 7, C AS v % 10 = 8 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=500.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" b c + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 851 total, 0 merged + NFA Contexts: 3 peak, 501 total, 101 pruned + NFA: 50 matched (len 8/9/9.0), 0 mismatched + NFA: 299 absorbed (len 1/1/1.0), 50 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=500.00 loops=1) +(10 rows) + +-- High match count scenario +CREATE VIEW rpr_ev_scale_high_match AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_scale_high_match'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------ + PATTERN (a b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=500.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 501 total, 0 merged + NFA Contexts: 2 peak, 501 total, 0 pruned + NFA: 250 matched (len 2/2/2.0), 0 mismatched + NFA: 0 absorbed, 250 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=500.00 loops=1) +(10 rows) + +-- High skip count scenario +CREATE VIEW rpr_ev_scale_high_skip AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS v % 100 = 1, + B AS v % 100 = 2, + C AS v % 100 = 3, + D AS v % 100 = 4, + E AS v % 100 = 5 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_scale_high_skip'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------ + PATTERN (a b c d e) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS v % 100 = 1, + B AS v % 100 = 2, + C AS v % 100 = 3, + D AS v % 100 = 4, + E AS v % 100 = 5 +);'); + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=500.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b c d e + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 2 peak, 501 total, 0 merged + NFA Contexts: 3 peak, 501 total, 490 pruned + NFA: 5 matched (len 5/5/5.0), 0 mismatched + NFA: 0 absorbed, 5 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=500.00 loops=1) +(10 rows) + +-- +-- Planner optimization: optimize_window_clauses must not alter RPR frame +-- +-- optimize_window_clauses() replaces frame options via prosupport functions. +-- Affected functions: row_number, rank, dense_rank, percent_rank, cume_dist, +-- ntile. All would change the frame to ROWS UNBOUNDED PRECEDING, breaking +-- RPR's required ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING. +-- Test with row_number() as representative case. +-- +-- Without RPR: row_number() frame is optimized to ROWS UNBOUNDED PRECEDING +CREATE VIEW rpr_ev_opt_no_rpr AS +SELECT row_number() OVER w +FROM generate_series(1, 10) AS s(v) +WINDOW w AS ( + ORDER BY v + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING +); +EXPLAIN (COSTS OFF) SELECT * FROM rpr_ev_opt_no_rpr; + QUERY PLAN +-------------------------------------------------------------- + Subquery Scan on rpr_ev_opt_no_rpr + -> WindowAgg + Window: w AS (ORDER BY s.v ROWS UNBOUNDED PRECEDING) + -> Sort + Sort Key: s.v + -> Function Scan on generate_series s +(6 rows) + +-- With RPR: frame must remain ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING +CREATE VIEW rpr_ev_opt_with_rpr AS +SELECT row_number() OVER w +FROM generate_series(1, 10) AS s(v) +WINDOW w AS ( + ORDER BY v + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE + B AS v > PREV(v) +); +EXPLAIN (COSTS OFF) SELECT * FROM rpr_ev_opt_with_rpr; + QUERY PLAN +-------------------------------------------------------------------------------------- + Subquery Scan on rpr_ev_opt_with_rpr + -> WindowAgg + Window: w AS (ORDER BY s.v ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Sort Key: s.v + -> Function Scan on generate_series s +(8 rows) + +-- +-- Planner optimization: non-RPR and RPR windows that share the same base frame +-- after frame optimization are kept as separate WindowAgg nodes. +-- +CREATE VIEW rpr_ev_opt_mixed AS +SELECT + row_number() OVER w_normal AS rn_normal, + row_number() OVER w_rpr AS rn_rpr +FROM generate_series(1, 5) AS s(v) +WINDOW + w_normal AS (ORDER BY v RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + w_rpr AS ( + ORDER BY v + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS v > 1 + ); +EXPLAIN (COSTS OFF) SELECT * FROM rpr_ev_opt_mixed; + QUERY PLAN +------------------------------------------------------------------------------------------ + Subquery Scan on rpr_ev_opt_mixed + -> WindowAgg + Window: w_rpr AS (ORDER BY s.v ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 0 + -> WindowAgg + Window: w_normal AS (ORDER BY s.v ROWS UNBOUNDED PRECEDING) + -> Sort + Sort Key: s.v + -> Function Scan on generate_series s +(10 rows) + +-- +-- Planner optimization: find_window_run_conditions must not push down +-- RPR window function results as Run Conditions. +-- +-- find_window_run_conditions() pushes WHERE filters on monotonic window +-- functions into WindowAgg as Run Conditions for early termination. +-- With RPR's required frame (ROWS BETWEEN CURRENT ROW AND UNBOUNDED +-- FOLLOWING), the monotonic direction determines which operators trigger +-- Run Condition pushdown: +-- INCREASING (<=): row_number, rank, dense_rank, percent_rank, +-- cume_dist, ntile +-- DECREASING (>): count(*) (via int8inc, END_UNBOUNDED_FOLLOWING) +-- RPR window function results are match-dependent, not monotonic. +-- Test with count(*) > 0 as representative case. +-- +-- Without RPR: count(*) > 0 is pushed down as Run Condition +EXPLAIN (COSTS OFF) +SELECT * FROM ( + SELECT count(*) OVER w AS cnt + FROM generate_series(1, 10) AS s(v) + WINDOW w AS ( + ORDER BY v + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + ) +) t WHERE cnt > 0; + QUERY PLAN +-------------------------------------------------------------------------------------- + Subquery Scan on t + -> WindowAgg + Window: w AS (ORDER BY s.v ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Run Condition: (count(*) OVER w > 0) + -> Sort + Sort Key: s.v + -> Function Scan on generate_series s +(7 rows) + +-- With RPR: count(*) > 0 must not be pushed down as Run Condition +EXPLAIN (COSTS OFF) +SELECT * FROM ( + SELECT count(*) OVER w AS cnt + FROM generate_series(1, 10) AS s(v) + WINDOW w AS ( + ORDER BY v + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B+) + DEFINE + B AS v > PREV(v) + ) +) t WHERE cnt > 0; + QUERY PLAN +-------------------------------------------------------------------------------------- + Subquery Scan on t + Filter: (t.cnt > 0) + -> WindowAgg + Window: w AS (ORDER BY s.v ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Sort Key: s.v + -> Function Scan on generate_series s +(9 rows) + +-- ============================================================ +-- Nav Mark Lookback/Lookahead Tests +-- Verifies planner-computed navigation offsets for tuplestore trim. +-- Lookback: how far back from currentpos (PREV, LAST, compound PREV_LAST/NEXT_LAST). +-- Lookahead: how far forward from match_start (FIRST, compound PREV_FIRST/NEXT_FIRST). +-- ============================================================ +-- Prepare statement for host variable offset test below +PREPARE rpr_nav_offset_prep(int8) AS +SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS v > PREV(v, $1) +); +-- No navigation function: offset 0 +EXPLAIN (COSTS OFF) SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS v > 0 +); + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 0 + -> Function Scan on generate_series s +(5 rows) + +-- NEXT only: no backward navigation, offset 0 +EXPLAIN (COSTS OFF) SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS v < NEXT(v) +); + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 0 + -> Function Scan on generate_series s +(5 rows) + +-- PREV(v): implicit offset 1 +EXPLAIN (COSTS OFF) SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS v > PREV(v) +); + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 1 + -> Function Scan on generate_series s +(5 rows) + +-- PREV(v, 3): explicit constant offset 3 +EXPLAIN (COSTS OFF) SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS v > PREV(v, 3) +); + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 3 + -> Function Scan on generate_series s +(5 rows) + +-- Two PREV with different offsets: max(1, 5) = 5 +EXPLAIN (COSTS OFF) SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS PREV(v, 1) < v AND PREV(v, 5) < v +); + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 5 + -> Function Scan on generate_series s +(5 rows) + +-- Host variable offset: custom plan resolves $1=2 to constant 2 +EXPLAIN (COSTS OFF) EXECUTE rpr_nav_offset_prep(2); + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 2 + -> Function Scan on generate_series s +(5 rows) + +-- Force generic plan: offset becomes "runtime" (Param node) +SET plan_cache_mode = force_generic_plan; +EXPLAIN (COSTS OFF) EXECUTE rpr_nav_offset_prep(2); + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: runtime + -> Function Scan on generate_series s +(5 rows) + +RESET plan_cache_mode; +DEALLOCATE rpr_nav_offset_prep; +-- FIRST(v): retain all (references match_start row) +EXPLAIN (COSTS OFF) SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS v > FIRST(v) +); + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+ + Nav Mark Lookback: 0 + Nav Mark Lookahead: 0 + -> Function Scan on generate_series s +(6 rows) + +-- LAST(v, 1): backward reach 1, same as PREV(v, 1) +EXPLAIN (COSTS OFF) SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS LAST(v, 1) > 0 +); + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+ + Nav Mark Lookback: 1 + -> Function Scan on generate_series s +(5 rows) + +-- LAST(v) without offset + PREV(v): no match_start dependency, offset 1 +EXPLAIN (COSTS OFF) SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS LAST(v) > PREV(v) +); + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 1 + -> Function Scan on generate_series s +(5 rows) + +-- Compound PREV(FIRST(val, 1), 2): lookback from match_start, firstOffset = 1-2 = -1 +EXPLAIN (COSTS OFF) SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS PREV(FIRST(v, 1), 2) > 0 +); + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+ + Nav Mark Lookback: 0 + Nav Mark Lookahead: -1 + -> Function Scan on generate_series s +(6 rows) + +-- Compound NEXT(FIRST(val), 3): firstOffset = 0+3 = 3 +EXPLAIN (COSTS OFF) SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS NEXT(FIRST(v), 3) > 0 +); + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+ + Nav Mark Lookback: 0 + Nav Mark Lookahead: 3 + -> Function Scan on generate_series s +(6 rows) + +-- Compound PREV(LAST(val), 2): lookback = 0+2 = 2 +EXPLAIN (COSTS OFF) SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS PREV(LAST(v), 2) > 0 +); + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 2 + -> Function Scan on generate_series s +(5 rows) + +-- Compound NEXT(LAST(val, 1), 3): lookback = max(1-3, 0) = 0 +EXPLAIN (COSTS OFF) SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS NEXT(LAST(v, 1), 3) > 0 +); + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+ + Nav Mark Lookback: 0 + -> Function Scan on generate_series s +(5 rows) + +-- Compound PREV(LAST(val, N), M): constant near-overflow (N+M just fits int64) +EXPLAIN (COSTS OFF) SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS PREV(LAST(v, 4611686018427387903), 4611686018427387903) IS NOT NULL +); + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+ + Nav Mark Lookback: 9223372036854775806 + -> Function Scan on generate_series s +(5 rows) + +-- Compound PREV(LAST(val, N), M): constant overflow -> retain all +EXPLAIN (COSTS OFF) SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS PREV(LAST(v, 4611686018427387904), 4611686018427387904) IS NOT NULL +); + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+ + Nav Mark Lookback: retain all + -> Function Scan on generate_series s +(5 rows) + +-- Compound NEXT(FIRST(val, N), M): constant lookahead overflow -> no trim impact +-- N + M overflows int64, but target is forward from match_start so it never +-- constrains trim. Lookahead remains at default (0). +EXPLAIN (COSTS OFF) SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS NEXT(FIRST(v, 4611686018427387904), 4611686018427387904) IS NOT NULL +); + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+ + Nav Mark Lookback: 0 + Nav Mark Lookahead: 0 + -> Function Scan on generate_series s +(6 rows) + +-- Compound PREV(LAST(val, $1), $2): parameter lookback overflow -> retain all +-- EXPLAIN shows "runtime" (plan-level); EXPLAIN ANALYZE shows "retain all" +-- (executor-resolved). +PREPARE test_overflow_lookback(int8, int8) AS +SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS PREV(LAST(v, $1), $2) IS NOT NULL +); +SET plan_cache_mode = force_generic_plan; +EXPLAIN (COSTS OFF) EXECUTE test_overflow_lookback(4611686018427387904, 4611686018427387904); + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+ + Nav Mark Lookback: runtime + -> Function Scan on generate_series s +(5 rows) + +EXPLAIN (COSTS OFF, ANALYZE, TIMING OFF, SUMMARY OFF) + EXECUTE test_overflow_lookback(4611686018427387904, 4611686018427387904); + QUERY PLAN +---------------------------------------------------------------------- + WindowAgg (actual rows=10.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+ + Nav Mark Lookback: retain all + Storage: Memory Maximum Storage: 17kB + NFA States: 1 peak, 11 total, 0 merged + NFA Contexts: 2 peak, 11 total, 10 pruned + NFA: 0 matched, 0 mismatched + -> Function Scan on generate_series s (actual rows=10.00 loops=1) +(9 rows) + +RESET plan_cache_mode; +DEALLOCATE test_overflow_lookback; +-- Compound NEXT(FIRST(val, $1), $2): parameter lookahead overflow -> no trim impact +PREPARE test_overflow_lookahead(int8, int8) AS +SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS NEXT(FIRST(v, $1), $2) IS NOT NULL +); +SET plan_cache_mode = force_generic_plan; +EXPLAIN (COSTS OFF, ANALYZE, TIMING OFF, SUMMARY OFF) + EXECUTE test_overflow_lookahead(4611686018427387904, 4611686018427387904); + QUERY PLAN +---------------------------------------------------------------------- + WindowAgg (actual rows=10.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+ + Nav Mark Lookback: 0 + Nav Mark Lookahead: 0 + Storage: Memory Maximum Storage: 17kB + NFA States: 1 peak, 11 total, 0 merged + NFA Contexts: 2 peak, 11 total, 10 pruned + NFA: 0 matched, 0 mismatched + -> Function Scan on generate_series s (actual rows=10.00 loops=1) +(10 rows) + +RESET plan_cache_mode; +DEALLOCATE test_overflow_lookahead; +-- PREV(v) + PREV(v, $1): NEEDS_EVAL path must account for implicit lookback=1 +-- Previously, eval_nav_max_offset_walker skipped PREV(v) when offset_arg was +-- NULL, causing maxOffset=0 when $1=0, which would trim the row needed by +-- PREV(v). Verify this executes without "cannot fetch row before mark" error. +PREPARE test_prev_implicit_offset(int8) AS +SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS PREV(v) IS NOT NULL AND PREV(v, $1) IS NOT NULL +); +EXECUTE test_prev_implicit_offset(0); + count +------- + 0 + 9 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 +(10 rows) + +DEALLOCATE test_prev_implicit_offset; +-- Runtime error: negative offset at execution time +PREPARE test_runtime_neg_offset(int8) AS +SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS PREV(v, $1) IS NOT NULL +); +EXECUTE test_runtime_neg_offset(-1); +ERROR: row pattern navigation offset must not be negative +DEALLOCATE test_runtime_neg_offset; +-- Runtime error: null offset at execution time +PREPARE test_runtime_null_offset(int8) AS +SELECT count(*) OVER w +FROM generate_series(1,10) s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS PREV(v, $1) IS NOT NULL +); +EXECUTE test_runtime_null_offset(NULL); +ERROR: row pattern navigation offset must not be null +DEALLOCATE test_runtime_null_offset; diff --git a/src/test/regress/expected/rpr_integration.out b/src/test/regress/expected/rpr_integration.out new file mode 100644 index 00000000000..0cc79b75601 --- /dev/null +++ b/src/test/regress/expected/rpr_integration.out @@ -0,0 +1,1518 @@ +-- ============================================================ +-- RPR Integration Tests +-- Planner optimization interaction tests for Row Pattern Recognition +-- ============================================================ +-- +-- Verifies that each planner optimization correctly handles RPR windows. +-- Even if individual optimizations are tested elsewhere, this file +-- provides a single checkpoint for all planner/RPR interactions. +-- +-- A. Planner Optimization Protection Tests +-- A1. Frame optimization bypass +-- A2. Run condition pushdown bypass +-- A3. Window dedup prevention (RPR vs non-RPR) +-- A4. Window dedup prevention (same PATTERN, different DEFINE) +-- A5. Unused window removal prevention +-- A6. Inverse transition bypass +-- A7. Cost estimation RPR awareness +-- A8. Subquery flattening prevention +-- A9. DEFINE expression non-propagation +-- A10. RPR + LIMIT +-- +-- B. Integration Scenario Tests +-- B1. RPR + CTE +-- B2. RPR + JOIN +-- B3. RPR + Set operations +-- B4. RPR + Prepared statements +-- B5. RPR + Partitioned table +-- B6. RPR + LATERAL +-- B7. RPR + Recursive CTE +-- B8. RPR + Incremental sort +-- B9. RPR + Volatile function in DEFINE +-- B10. RPR + Correlated subquery +-- +CREATE TABLE rpr_integ (id INT, val INT); +INSERT INTO rpr_integ VALUES + (1, 10), (2, 20), (3, 15), (4, 25), (5, 5), + (6, 30), (7, 35), (8, 20), (9, 40), (10, 45); +-- ============================================================ +-- A1. Frame optimization bypass +-- ============================================================ +-- Verify that optimize_window_clauses() does not apply frame +-- optimization to RPR windows. Both queries below use the same input +-- frame (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) with +-- row_number(), whose prosupport handles +-- SupportRequestOptimizeWindowClause and triggers frame rewriting. +-- In the non-RPR baseline the planner rewrites the frame to ROWS +-- UNBOUNDED PRECEDING, while in the RPR case the guard in +-- optimize_window_clauses() blocks the rewrite and the frame is +-- preserved as specified. +-- Non-RPR baseline: the planner rewrites the frame to ROWS UNBOUNDED PRECEDING. +EXPLAIN (COSTS OFF) +SELECT row_number() OVER w FROM rpr_integ +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING); + QUERY PLAN +------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS UNBOUNDED PRECEDING) + -> Sort + Sort Key: id + -> Seq Scan on rpr_integ +(5 rows) + +-- RPR case: the frame is preserved as specified. +EXPLAIN (COSTS OFF) +SELECT row_number() OVER w FROM rpr_integ +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Sort Key: id + -> Seq Scan on rpr_integ +(7 rows) + +-- ============================================================ +-- A2. Run condition pushdown bypass +-- ============================================================ +-- Verify that find_window_run_conditions() does not push a monotonic +-- filter down as a Run Condition on RPR windows. RPR match counts are +-- determined by pattern matching rather than by a monotonic +-- accumulation over the frame, so a filter such as "cnt > 0" cannot be +-- used to stop evaluating the window function early. +-- Non-RPR baseline: the filter is expected to appear as a Run Condition. +EXPLAIN (COSTS OFF) +SELECT * FROM ( + SELECT count(*) OVER w AS cnt + FROM rpr_integ + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) +) t WHERE cnt > 0; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Subquery Scan on t + -> WindowAgg + Window: w AS (ORDER BY rpr_integ.id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Run Condition: (count(*) OVER w > 0) + -> Sort + Sort Key: rpr_integ.id + -> Seq Scan on rpr_integ +(7 rows) + +-- RPR case: the filter must appear as a Filter above the WindowAgg, +-- not as a Run Condition. +EXPLAIN (COSTS OFF) +SELECT * FROM ( + SELECT count(*) OVER w AS cnt + FROM rpr_integ + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +) t WHERE cnt > 0; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Subquery Scan on t + Filter: (t.cnt > 0) + -> WindowAgg + Window: w AS (ORDER BY rpr_integ.id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Sort Key: rpr_integ.id + -> Seq Scan on rpr_integ +(9 rows) + +-- Verify that the RPR query still returns every row whose match count is +-- greater than zero, confirming the filter is evaluated above the +-- WindowAgg rather than cutting off pattern matching prematurely. +SELECT * FROM ( + SELECT id, val, count(*) OVER w AS cnt + FROM rpr_integ + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +) t WHERE cnt > 0 +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 3 | 15 | 2 + 5 | 5 | 3 + 8 | 20 | 3 +(4 rows) + +-- ============================================================ +-- A3. Window dedup prevention (RPR vs non-RPR) +-- ============================================================ +-- Verify that PostgreSQL does not merge an RPR window with a non-RPR +-- window even when both share the same ORDER BY and frame +-- specification. RPR pattern matching produces results that are +-- semantically different from a plain frame-based aggregate, so the +-- two windows must remain as separate WindowAgg nodes. Inline window +-- specs are used throughout this section because only inline windows +-- are subject to the dedup path; distinct named windows are always +-- kept separate regardless of equivalence. +-- Non-RPR baseline: two inline windows with identical spec are +-- deduped by the planner into a single WindowAgg node, confirming +-- that the dedup path is active for non-RPR windows. +EXPLAIN (COSTS OFF) +SELECT + count(*) OVER (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS cnt, + sum(val) OVER (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS total +FROM rpr_integ; + QUERY PLAN +-------------------------------------------------------------------------------- + WindowAgg + Window: w1 AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + -> Sort + Sort Key: id + -> Seq Scan on rpr_integ +(5 rows) + +-- An inline RPR window and an inline non-RPR window share the same +-- ORDER BY and frame but must remain as distinct WindowAgg nodes. +EXPLAIN (COSTS OFF) +SELECT + count(*) OVER (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) AS rpr_cnt, + count(*) OVER (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS normal_cnt +FROM rpr_integ; + QUERY PLAN +-------------------------------------------------------------------------------------- + WindowAgg + Window: w2 AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + -> WindowAgg + Window: w1 AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Sort Key: id + -> Seq Scan on rpr_integ +(9 rows) + +-- Verify that the two windows return independent counts per row, +-- confirming they were not merged into a single WindowAgg. +SELECT + id, val, + count(*) OVER (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) AS rpr_cnt, + count(*) OVER (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS normal_cnt +FROM rpr_integ +ORDER BY id; + id | val | rpr_cnt | normal_cnt +----+-----+---------+------------ + 1 | 10 | 2 | 10 + 2 | 20 | 0 | 9 + 3 | 15 | 2 | 8 + 4 | 25 | 0 | 7 + 5 | 5 | 3 | 6 + 6 | 30 | 0 | 5 + 7 | 35 | 0 | 4 + 8 | 20 | 3 | 3 + 9 | 40 | 0 | 2 + 10 | 45 | 0 | 1 +(10 rows) + +-- ============================================================ +-- A4. Window dedup prevention (same PATTERN, different DEFINE) +-- ============================================================ +-- Verify that inline-window dedup does not merge two RPR windows +-- that share the same PATTERN structure but have different DEFINE +-- conditions. Even though the ORDER BY, frame, and PATTERN coincide, +-- the differing DEFINE expressions classify rows differently and +-- must therefore yield two separate WindowAgg nodes. Inline specs +-- are used here because dedup only applies to inline windows. +-- Baseline: two inline RPR windows that are structurally identical +-- (same ORDER BY, frame, PATTERN, and DEFINE) are deduped by the +-- parser into a single WindowAgg node, confirming that parser-level +-- dedup is active for RPR windows whose DEFINE matches. +EXPLAIN (COSTS OFF) +SELECT + count(*) OVER (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) AS cnt, + sum(val) OVER (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) AS total +FROM rpr_integ; + QUERY PLAN +-------------------------------------------------------------------------------- + WindowAgg + Window: w1 AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Sort Key: id + -> Seq Scan on rpr_integ +(7 rows) + +-- Two inline RPR windows with the same PATTERN but opposite DEFINE +-- conditions must remain as separate WindowAgg nodes. +EXPLAIN (COSTS OFF) +SELECT + count(*) OVER (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) AS cnt_up, + count(*) OVER (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val < PREV(val)) AS cnt_down +FROM rpr_integ; + QUERY PLAN +-------------------------------------------------------------------------------------- + WindowAgg + Window: w2 AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> WindowAgg + Window: w1 AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Sort Key: id + -> Seq Scan on rpr_integ +(11 rows) + +-- Verify that the two windows return different counts per row, +-- confirming the DEFINE conditions were not collapsed by dedup. +SELECT + id, val, + count(*) OVER (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) AS cnt_up, + count(*) OVER (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val < PREV(val)) AS cnt_down +FROM rpr_integ +ORDER BY id; + id | val | cnt_up | cnt_down +----+-----+--------+---------- + 1 | 10 | 2 | 0 + 2 | 20 | 0 | 2 + 3 | 15 | 2 | 0 + 4 | 25 | 0 | 2 + 5 | 5 | 3 | 0 + 6 | 30 | 0 | 0 + 7 | 35 | 0 | 2 + 8 | 20 | 3 | 0 + 9 | 40 | 0 | 0 + 10 | 45 | 0 | 0 +(10 rows) + +-- ============================================================ +-- A5. Unused window removal prevention +-- ============================================================ +-- Verify that remove_unused_subquery_outputs() does not drop an RPR +-- window function even when the outer query does not reference its +-- result. The RPR WindowAgg node is responsible for performing pattern +-- matching, so removing the window function would silently skip the +-- pattern match even though the surrounding query still depends on +-- RPR semantics. +-- The outer query ignores the per-row window result, yet pattern +-- matching must still execute. The plan must still contain a +-- WindowAgg node below the outer Aggregate; if the window were +-- removed, only Aggregate + Seq Scan would appear. +EXPLAIN (COSTS OFF) +SELECT count(*) FROM ( + SELECT count(*) OVER w FROM rpr_integ + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > PREV(val)) +) t; + QUERY PLAN +------------------------------------------------------------------------- + Aggregate + -> WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 1 + -> Seq Scan on rpr_integ +(6 rows) + +SELECT count(*) FROM ( + SELECT count(*) OVER w FROM rpr_integ + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > PREV(val)) +) t; + count +------- + 10 +(1 row) + +-- The DEFINE expression references PREV(val), so the window must be +-- preserved even if the outer query only aggregates over the count. +-- The plan must still contain a WindowAgg with the PATTERN/DEFINE +-- intact. +EXPLAIN (COSTS OFF) +SELECT count(*), sum(c) FROM ( + SELECT count(*) OVER w AS c FROM rpr_integ + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > PREV(val)) +) t; + QUERY PLAN +------------------------------------------------------------------------- + Aggregate + -> WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 1 + -> Seq Scan on rpr_integ +(6 rows) + +SELECT count(*), sum(c) FROM ( + SELECT count(*) OVER w AS c FROM rpr_integ + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS val > PREV(val)) +) t; + count | sum +-------+----- + 10 | 6 +(1 row) + +-- The DEFINE expression contains no navigation, but the RPR window +-- must still be preserved because the match structure itself affects +-- the count. The plan must retain the WindowAgg. +EXPLAIN (COSTS OFF) +SELECT count(*), sum(c) FROM ( + SELECT count(*) OVER w AS c FROM rpr_integ + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS TRUE) +) t; + QUERY PLAN +------------------------------------------------------------------------- + Aggregate + -> WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+" + Nav Mark Lookback: 0 + -> Seq Scan on rpr_integ +(6 rows) + +SELECT count(*), sum(c) FROM ( + SELECT count(*) OVER w AS c FROM rpr_integ + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+) + DEFINE A AS TRUE) +) t; + count | sum +-------+----- + 10 | 10 +(1 row) + +-- XXX: "val" is non-resjunk in the subquery output and is not +-- referenced by the outer query. Without a guard, +-- remove_unused_subquery_outputs() would replace it with NULL in +-- the subquery output, and that replacement propagates to the +-- scan's targetlist -- DEFINE would then evaluate with NULL +-- inputs. The targetlist has no way to distinguish "exposed to +-- the outer query" from "referenced only by DEFINE", so the +-- optimization cannot be applied selectively. The column guard +-- in allpaths.c blocks this replacement for any column referenced +-- by an RPR DEFINE clause, keeping the WindowAgg with DEFINE +-- active in the plan. +EXPLAIN (COSTS OFF) +SELECT count(*) FROM ( + SELECT val, count(*) OVER w FROM rpr_integ + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +) t; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Aggregate + -> WindowAgg + Window: w AS (ORDER BY rpr_integ.id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Sort Key: rpr_integ.id + -> Seq Scan on rpr_integ +(8 rows) + +SELECT count(*) FROM ( + SELECT val, count(*) OVER w FROM rpr_integ + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +) t; + count +------- + 10 +(1 row) + +-- ============================================================ +-- A6. Inverse transition bypass +-- ============================================================ +-- Verify that RPR windows do not use the moving aggregate (inverse +-- transition) optimization. Moving aggregates maintain state by +-- adding arriving rows and subtracting leaving rows, but an RPR +-- reduced frame is not a sliding window; the set of rows included in +-- the frame is determined by pattern matching and cannot be derived +-- incrementally from the previous frame. +-- sum() would normally be eligible for the moving aggregate +-- optimization; under RPR it must be computed from scratch over each +-- reduced frame, and the returned values must match the pattern. +-- Note: inverse-transition selection is not exposed in the plan, so +-- there is no direct EXPLAIN assertion for it. The structural +-- guarantee is that RPR uses its own navigation mark, distinct from +-- the moving-aggregate mark, so the inverse-transition path is +-- never reached on the RPR side. This test verifies that +-- separation indirectly: if inverse transition leaked into the RPR +-- path, state would mix across match boundaries and pattern_sum +-- would diverge from the expected output, failing the regression. +SELECT id, val, + sum(val) OVER w AS pattern_sum +FROM rpr_integ +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +ORDER BY id; + id | val | pattern_sum +----+-----+------------- + 1 | 10 | 30 + 2 | 20 | + 3 | 15 | 40 + 4 | 25 | + 5 | 5 | 70 + 6 | 30 | + 7 | 35 | + 8 | 20 | 105 + 9 | 40 | + 10 | 45 | +(10 rows) + +-- ============================================================ +-- A7. Cost estimation RPR awareness +-- ============================================================ +-- cost_windowagg() must account for DEFINE expression evaluation cost. +-- Verify RPR WindowAgg cost > non-RPR WindowAgg cost. +CREATE FUNCTION get_windowagg_cost(query text) RETURNS numeric AS $$ +DECLARE + plan json; + cost numeric; +BEGIN + EXECUTE 'EXPLAIN (FORMAT JSON) ' || query INTO plan; + cost := (plan->0->'Plan'->>'Total Cost')::numeric; + RETURN cost; +END; +$$ LANGUAGE plpgsql; +SELECT get_windowagg_cost( + 'SELECT count(*) OVER w FROM rpr_integ + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+ C+) DEFINE B AS val > PREV(val), C AS val < PREV(val))') + > + get_windowagg_cost( + 'SELECT count(*) OVER w FROM rpr_integ + WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)') + AS rpr_cost_is_higher; + rpr_cost_is_higher +-------------------- + t +(1 row) + +DROP FUNCTION get_windowagg_cost(text); +-- ============================================================ +-- A8. Subquery flattening prevention +-- ============================================================ +-- Verify that a subquery containing an RPR window is not flattened +-- into the outer query. is_simple_subquery() already blocks pullup +-- for subqueries with window functions in general; this test confirms +-- the rule continues to apply to RPR windows, so EXPLAIN must still +-- show a Subquery Scan above the RPR WindowAgg. +EXPLAIN (COSTS OFF) +SELECT * FROM ( + SELECT id, val, count(*) OVER w AS cnt + FROM rpr_integ + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +) sub +WHERE cnt > 0; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Subquery Scan on sub + Filter: (sub.cnt > 0) + -> WindowAgg + Window: w AS (ORDER BY rpr_integ.id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Sort Key: rpr_integ.id + -> Seq Scan on rpr_integ +(9 rows) + +-- ============================================================ +-- A9. DEFINE expression non-propagation +-- ============================================================ +-- Verify that DEFINE expressions are not propagated into the +-- targetlist of any upper WindowAgg node. Only the column references +-- consumed by DEFINE should be passed up; the full DEFINE expression +-- is meaningful only inside the RPR WindowAgg that owns it. +-- EXPLAIN VERBOSE is therefore expected to show a clean targetlist on +-- the outer WindowAgg, with no DEFINE-derived expression leaking in. +-- Note: columns referenced by DEFINE (e.g., "val") may appear as +-- resjunk entries in upper WindowAgg targetlists -- that is a +-- harmless byproduct of the column guard's broad scope and does not +-- affect client output. The claim here is limited to the full +-- DEFINE boolean expression. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT + count(*) OVER w_rpr AS rpr_cnt, + count(*) OVER w_normal AS normal_cnt +FROM rpr_integ +WINDOW + w_rpr AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)), + w_normal AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING); + QUERY PLAN +--------------------------------------------------------------------------------------------------- + WindowAgg + Output: (count(*) OVER w_rpr), count(*) OVER w_normal, id, val + Window: w_normal AS (ORDER BY rpr_integ.id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + -> WindowAgg + Output: id, val, count(*) OVER w_rpr + Window: w_rpr AS (ORDER BY rpr_integ.id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Output: id, val + Sort Key: rpr_integ.id + -> Seq Scan on public.rpr_integ + Output: id, val +(13 rows) + +-- Executing the same query shows the client result is limited to +-- the two projected columns; "id" and "val" that appeared in the +-- upper WindowAgg Output line are resjunk-only and do not reach +-- the client. +SELECT + count(*) OVER w_rpr AS rpr_cnt, + count(*) OVER w_normal AS normal_cnt +FROM rpr_integ +WINDOW + w_rpr AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)), + w_normal AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) +ORDER BY rpr_cnt DESC, normal_cnt DESC; + rpr_cnt | normal_cnt +---------+------------ + 3 | 6 + 3 | 3 + 2 | 10 + 2 | 8 + 0 | 9 + 0 | 7 + 0 | 5 + 0 | 4 + 0 | 2 + 0 | 1 +(10 rows) + +-- ============================================================ +-- A10. RPR + LIMIT +-- ============================================================ +-- LIMIT must not interfere with RPR pattern matching. The Limit +-- node must sit above the WindowAgg so that pattern matching runs +-- on the full partition first; the result is then a prefix of the +-- un-LIMITed output. Pushing Limit below the WindowAgg would +-- truncate input before matching and silently drop valid matches. +EXPLAIN (COSTS OFF) +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_integ +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +LIMIT 5; + QUERY PLAN +------------------------------------------------------------------------------------- + Limit + -> WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Sort Key: id + -> Seq Scan on rpr_integ +(8 rows) + +-- Reference: un-LIMITed result against which the LIMIT 5 result is +-- compared. +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_integ +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 2 + 4 | 25 | 0 + 5 | 5 | 3 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 20 | 3 + 9 | 40 | 0 + 10 | 45 | 0 +(10 rows) + +-- LIMIT 5 case; the first five rows must match the reference above. +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_integ +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +LIMIT 5; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 2 + 4 | 25 | 0 + 5 | 5 | 3 +(5 rows) + +-- ============================================================ +-- B1. RPR + CTE +-- ============================================================ +-- Verify that an RPR window embedded inside a CTE behaves the same as +-- a direct RPR query: +-- (1) A single-reference CTE is inlined by the planner and yields +-- per-row results identical to the direct RPR query. +-- (2) A multi-reference CTE is materialized (CTE Scan appears in +-- the plan) so pattern matching runs once, and every reference +-- observes the same match results. +-- Baseline: direct RPR produces the per-row reference output. +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_integ +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 2 + 4 | 25 | 0 + 5 | 5 | 3 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 20 | 3 + 9 | 40 | 0 + 10 | 45 | 0 +(10 rows) + +-- Single-reference CTE: plan has no "CTE rpr_result" scope, showing +-- the CTE was inlined into the surrounding query. +EXPLAIN (COSTS OFF) +WITH rpr_result AS ( + SELECT id, val, count(*) OVER w AS cnt + FROM rpr_integ + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +) +SELECT id, val, cnt FROM rpr_result ORDER BY id; + QUERY PLAN +----------------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY rpr_integ.id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Sort Key: rpr_integ.id + -> Seq Scan on rpr_integ +(7 rows) + +-- Result must match the baseline row-for-row. +WITH rpr_result AS ( + SELECT id, val, count(*) OVER w AS cnt + FROM rpr_integ + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +) +SELECT id, val, cnt FROM rpr_result ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 2 + 4 | 25 | 0 + 5 | 5 | 3 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 20 | 3 + 9 | 40 | 0 + 10 | 45 | 0 +(10 rows) + +-- Multi-reference CTE (self-join): plan has a "CTE rpr_result" scope +-- and CTE Scan nodes on both sides, showing the CTE was materialized +-- and pattern matching ran only once. +EXPLAIN (COSTS OFF) +WITH rpr_result AS ( + SELECT id, val, count(*) OVER w AS cnt + FROM rpr_integ + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +) +SELECT r1.id, r1.cnt +FROM rpr_result r1 +JOIN rpr_result r2 ON r1.id = r2.id AND r1.cnt = r2.cnt +WHERE r1.cnt > 0 +ORDER BY r1.id; + QUERY PLAN +------------------------------------------------------------------------------------------------- + Merge Join + Merge Cond: ((r2.id = r1.id) AND (r2.cnt = r1.cnt)) + CTE rpr_result + -> WindowAgg + Window: w AS (ORDER BY rpr_integ.id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Sort Key: rpr_integ.id + -> Seq Scan on rpr_integ + -> Incremental Sort + Sort Key: r2.id, r2.cnt + Presorted Key: r2.id + -> CTE Scan on rpr_result r2 + -> Sort + Sort Key: r1.id, r1.cnt + -> CTE Scan on rpr_result r1 + Filter: (cnt > 0) +(18 rows) + +-- Result: both references see the same match counts, so the self-join +-- preserves all matched rows from the baseline. +WITH rpr_result AS ( + SELECT id, val, count(*) OVER w AS cnt + FROM rpr_integ + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +) +SELECT r1.id, r1.cnt +FROM rpr_result r1 +JOIN rpr_result r2 ON r1.id = r2.id AND r1.cnt = r2.cnt +WHERE r1.cnt > 0 +ORDER BY r1.id; + id | cnt +----+----- + 1 | 2 + 3 | 2 + 5 | 3 + 8 | 3 +(4 rows) + +-- ============================================================ +-- B2. RPR + JOIN +-- ============================================================ +-- Verify that an RPR subquery can be joined with another relation. +-- Two aspects are checked against a non-RPR baseline: +-- (1) Flattening: a non-RPR subquery is pulled up by the planner +-- (no Subquery Scan in the plan); an RPR subquery is kept +-- un-flattened (Subquery Scan above WindowAgg). +-- (2) Join correctness: the join aligns each RPR match row with +-- the dimension-table row on the same key. +CREATE TABLE rpr_integ2 (id INT, label TEXT); +INSERT INTO rpr_integ2 VALUES + (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'), + (6, 'f'), (7, 'g'), (8, 'h'), (9, 'i'), (10, 'j'); +-- Baseline: a non-RPR subquery is flattened by the planner. No +-- Subquery Scan node appears; the inner SELECT is merged into the +-- outer join. +EXPLAIN (COSTS OFF) +SELECT r.id, r.val, j.label +FROM (SELECT id, val FROM rpr_integ) r +JOIN rpr_integ2 j ON r.id = j.id +ORDER BY r.id; + QUERY PLAN +-------------------------------------- + Merge Join + Merge Cond: (j.id = rpr_integ.id) + -> Sort + Sort Key: j.id + -> Seq Scan on rpr_integ2 j + -> Sort + Sort Key: rpr_integ.id + -> Seq Scan on rpr_integ +(8 rows) + +-- RPR subquery JOIN: the Subquery Scan is preserved above the +-- WindowAgg, confirming the RPR subquery is not flattened. +EXPLAIN (COSTS OFF) +SELECT r.id, r.cnt, j.label +FROM ( + SELECT id, count(*) OVER w AS cnt + FROM rpr_integ + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +) r +JOIN rpr_integ2 j ON r.id = j.id +WHERE r.cnt > 0 +ORDER BY r.id; + QUERY PLAN +----------------------------------------------------------------------------------------------------- + Merge Join + Merge Cond: (r.id = j.id) + -> Subquery Scan on r + Filter: (r.cnt > 0) + -> WindowAgg + Window: w AS (ORDER BY rpr_integ.id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Sort Key: rpr_integ.id + -> Seq Scan on rpr_integ + -> Sort + Sort Key: j.id + -> Seq Scan on rpr_integ2 j +(14 rows) + +-- Result: matched RPR rows align with dimension rows on id, showing +-- the join correctly pairs per-row match counts with their labels. +SELECT r.id, r.cnt, j.label +FROM ( + SELECT id, count(*) OVER w AS cnt + FROM rpr_integ + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +) r +JOIN rpr_integ2 j ON r.id = j.id +WHERE r.cnt > 0 +ORDER BY r.id; + id | cnt | label +----+-----+------- + 1 | 2 | a + 3 | 2 | c + 5 | 3 | e + 8 | 3 | h +(4 rows) + +-- ============================================================ +-- B3. RPR + Set operations +-- ============================================================ +-- Verify that RPR results combine correctly with non-RPR results +-- under a UNION ALL. The plan must show an Append node with two +-- independent child plans: the RPR branch with Pattern/DEFINE active, +-- and the non-RPR branch with a plain WindowAgg. Each child scans +-- the base relation on its own and contributes its rows to the +-- unioned output. +-- Plan: Append with two independent children. The RPR branch has a +-- WindowAgg carrying Pattern/Nav Mark Lookback; the non-RPR branch +-- has a plain WindowAgg with no pattern metadata. +EXPLAIN (COSTS OFF) +SELECT id, cnt, 'rpr' AS source FROM ( + SELECT id, count(*) OVER w AS cnt + FROM rpr_integ + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +) t WHERE cnt > 0 +UNION ALL +SELECT id, count(*) OVER (ORDER BY id) AS cnt, 'normal' AS source +FROM rpr_integ +ORDER BY source, id; + QUERY PLAN +----------------------------------------------------------------------------------------------------------- + Sort + Sort Key: ('rpr'::text), t.id + -> Append + -> Subquery Scan on t + Filter: (t.cnt > 0) + -> WindowAgg + Window: w AS (ORDER BY rpr_integ.id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Sort Key: rpr_integ.id + -> Seq Scan on rpr_integ + -> WindowAgg + Window: w1 AS (ORDER BY rpr_integ_1.id) + -> Sort + Sort Key: rpr_integ_1.id + -> Seq Scan on rpr_integ rpr_integ_1 +(17 rows) + +-- Result: rows from both branches are present in the unioned output. +-- The RPR branch emits only matched rows (cnt > 0), while the +-- non-RPR branch emits all rows with its own count values. +SELECT id, cnt, 'rpr' AS source FROM ( + SELECT id, count(*) OVER w AS cnt + FROM rpr_integ + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +) t WHERE cnt > 0 +UNION ALL +SELECT id, count(*) OVER (ORDER BY id) AS cnt, 'normal' AS source +FROM rpr_integ +ORDER BY source, id; + id | cnt | source +----+-----+-------- + 1 | 1 | normal + 2 | 2 | normal + 3 | 3 | normal + 4 | 4 | normal + 5 | 5 | normal + 6 | 6 | normal + 7 | 7 | normal + 8 | 8 | normal + 9 | 9 | normal + 10 | 10 | normal + 1 | 2 | rpr + 3 | 2 | rpr + 5 | 3 | rpr + 8 | 3 | rpr +(14 rows) + +-- ============================================================ +-- B4. RPR + Prepared statements +-- ============================================================ +-- Verify that RPR queries survive the prepared-statement path by +-- exercising both plancache modes with a parameter that feeds into +-- RPR's navigation offset (PREV(val, $1)). The parameter surfaces +-- the RPR-specific plancache difference: +-- - custom plan: "Nav Mark Lookback" is resolved to the literal +-- parameter value at plan time (e.g., "Nav Mark Lookback: 1"). +-- - generic plan: "Nav Mark Lookback" is deferred to execution and +-- appears as "Nav Mark Lookback: runtime" in the plan. +-- The result must be identical under both modes. +-- Register the prepared statement; DEFINE uses PREV(val, $1) so the +-- parameter reaches RPR's navigation machinery. +PREPARE rpr_prev(int) AS +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_integ +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val, $1)) +ORDER BY id; +-- Custom plan: Nav Mark Lookback resolved to the literal 1. +SET plan_cache_mode = force_custom_plan; +EXPLAIN (COSTS OFF) EXECUTE rpr_prev(1); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Sort Key: id + -> Seq Scan on rpr_integ +(7 rows) + +EXECUTE rpr_prev(1); + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 2 + 4 | 25 | 0 + 5 | 5 | 3 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 20 | 3 + 9 | 40 | 0 + 10 | 45 | 0 +(10 rows) + +-- Generic plan: Nav Mark Lookback deferred to execution, shown as +-- "runtime" in the plan. Result must match the custom-plan result +-- exactly. +SET plan_cache_mode = force_generic_plan; +EXPLAIN (COSTS OFF) EXECUTE rpr_prev(1); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: runtime + -> Sort + Sort Key: id + -> Seq Scan on rpr_integ +(7 rows) + +EXECUTE rpr_prev(1); + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 2 + 4 | 25 | 0 + 5 | 5 | 3 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 20 | 3 + 9 | 40 | 0 + 10 | 45 | 0 +(10 rows) + +RESET plan_cache_mode; +DEALLOCATE rpr_prev; +-- ============================================================ +-- B5. RPR + Partitioned table +-- ============================================================ +-- Verify that RPR pattern matching works correctly when the source +-- relation is partitioned. The planner must gather rows from every +-- partition into a single ordered stream before RPR can see them, +-- because pattern matching is sequential across the entire +-- partition-by group and cannot be performed independently on each +-- table partition. +CREATE TABLE rpr_part (id INT, val INT) PARTITION BY RANGE (id); +CREATE TABLE rpr_part_1 PARTITION OF rpr_part FOR VALUES FROM (1) TO (6); +CREATE TABLE rpr_part_2 PARTITION OF rpr_part FOR VALUES FROM (6) TO (11); +INSERT INTO rpr_part SELECT id, val FROM rpr_integ; +-- Plan: partition scans are combined with Append (or Merge Append), +-- sorted into a single ordered stream, and fed into one WindowAgg +-- that performs RPR pattern matching across the combined stream. +EXPLAIN (COSTS OFF) +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_part +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +ORDER BY id; + QUERY PLAN +---------------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY rpr_part.id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Sort Key: rpr_part.id + -> Append + -> Seq Scan on rpr_part_1 + -> Seq Scan on rpr_part_2 +(9 rows) + +-- Baseline: the same query against the non-partitioned rpr_integ +-- produces the per-row reference output. +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_integ +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 2 + 4 | 25 | 0 + 5 | 5 | 3 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 20 | 3 + 9 | 40 | 0 + 10 | 45 | 0 +(10 rows) + +-- Result against the partitioned table must match the baseline +-- row-for-row. +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_part +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 2 + 4 | 25 | 0 + 5 | 5 | 3 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 20 | 3 + 9 | 40 | 0 + 10 | 45 | 0 +(10 rows) + +DROP TABLE rpr_part; +-- ============================================================ +-- B6. RPR + LATERAL +-- ============================================================ +-- RPR inside a LATERAL subquery. Qualified column references from +-- the outer query are not yet supported in DEFINE, so this tests +-- the basic case where LATERAL provides the correlation filter +-- (WHERE id <= o.id) and DEFINE uses only local columns. The plan +-- must show a Nested Loop driving the outer relation into the inner +-- subquery scan, with the RPR WindowAgg re-executed for each outer +-- row and the correlation surfacing as a scan-level Filter on +-- "id <= o.id". +-- Plan: Nested Loop with the RPR WindowAgg in the inner leg, driven +-- by the filtered outer rows (o.id IN (5, 10)). +EXPLAIN (COSTS OFF) +SELECT o.id AS outer_id, r.id, r.cnt +FROM rpr_integ o, +LATERAL ( + SELECT id, count(*) OVER w AS cnt + FROM rpr_integ + WHERE id <= o.id + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +) r +WHERE r.cnt > 0 AND o.id IN (5, 10) +ORDER BY o.id, r.id; + QUERY PLAN +----------------------------------------------------------------------------------------------------------- + Sort + Sort Key: o.id, r.id + -> Nested Loop + -> Seq Scan on rpr_integ o + Filter: (id = ANY ('{5,10}'::integer[])) + -> Subquery Scan on r + Filter: (r.cnt > 0) + -> WindowAgg + Window: w AS (ORDER BY rpr_integ.id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Sort Key: rpr_integ.id + -> Seq Scan on rpr_integ + Filter: (id <= o.id) +(15 rows) + +-- Result: for each of the two outer ids (5 and 10), the LATERAL +-- subquery produces RPR match counts over the restricted input. +SELECT o.id AS outer_id, r.id, r.cnt +FROM rpr_integ o, +LATERAL ( + SELECT id, count(*) OVER w AS cnt + FROM rpr_integ + WHERE id <= o.id + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +) r +WHERE r.cnt > 0 AND o.id IN (5, 10) +ORDER BY o.id, r.id; + outer_id | id | cnt +----------+----+----- + 5 | 1 | 2 + 5 | 3 | 2 + 10 | 1 | 2 + 10 | 3 | 2 + 10 | 5 | 3 + 10 | 8 | 3 +(6 rows) + +-- ============================================================ +-- B7. RPR + Recursive CTE +-- ============================================================ +-- Verify that an RPR window can appear inside the non-recursive +-- (base) leg of a recursive CTE. The plan must show the RPR +-- WindowAgg sitting under the Recursive Union as the base-leg +-- child, with the WorkTable Scan feeding the recursive leg above +-- it. This confirms that RPR output can seed a recursive CTE +-- (window functions cannot appear in the recursive leg itself, a +-- PostgreSQL restriction, so this is the natural place to exercise +-- "RPR under Recursive Union"). +-- +-- XXX: Whether this case falls under the ISO/IEC 9075-2 4.18.5 / +-- 6.17.5 prohibition is not something I can judge. If this case +-- is not prohibited, the open question is whether a query that +-- does trigger the prohibition can be constructed at all. +-- Whether to prohibit this case is left to the community. +-- Plan: Recursive Union with the RPR WindowAgg on the base leg and +-- the WorkTable Scan on the recursive leg. +EXPLAIN (COSTS OFF) +WITH RECURSIVE seq AS ( + SELECT id, val, count(*) OVER w AS cnt + FROM rpr_integ + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) + UNION ALL + SELECT id + 100, val, cnt FROM seq WHERE id < 3 +) +SELECT id, val, cnt FROM seq ORDER BY id; + QUERY PLAN +------------------------------------------------------------------------------------------------------- + Sort + Sort Key: seq.id + CTE seq + -> Recursive Union + -> WindowAgg + Window: w AS (ORDER BY rpr_integ.id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Sort Key: rpr_integ.id + -> Seq Scan on rpr_integ + -> WorkTable Scan on seq seq_1 + Filter: (id < 3) + -> CTE Scan on seq +(14 rows) + +-- Result: the base leg contributes the RPR match counts; the +-- recursive leg propagates those counts with shifted ids. +WITH RECURSIVE seq AS ( + SELECT id, val, count(*) OVER w AS cnt + FROM rpr_integ + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) + UNION ALL + SELECT id + 100, val, cnt FROM seq WHERE id < 3 +) +SELECT id, val, cnt FROM seq ORDER BY id; + id | val | cnt +-----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 2 + 4 | 25 | 0 + 5 | 5 | 3 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 20 | 3 + 9 | 40 | 0 + 10 | 45 | 0 + 101 | 10 | 2 + 102 | 20 | 0 +(12 rows) + +-- ============================================================ +-- B8. RPR + Incremental sort +-- ============================================================ +-- Verify that RPR pattern matching works correctly when the input +-- to WindowAgg arrives via an incremental sort. The index on (id) +-- provides presorted input for the first ORDER BY key, so +-- "ORDER BY id, val" lets the planner use Incremental Sort to order +-- only on the second key. The plan must show Incremental Sort +-- below the RPR WindowAgg, and RPR must produce the same per-row +-- match counts as it would with a plain Sort. +CREATE INDEX rpr_integ_id_idx ON rpr_integ (id); +SET enable_seqscan = off; +-- Plan: RPR WindowAgg above an Incremental Sort above an Index Scan. +-- The Incremental Sort declares "Presorted Key: id" and sorts only +-- on val within each id group. +EXPLAIN (COSTS OFF) +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_integ +WINDOW w AS (ORDER BY id, val + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)); + QUERY PLAN +------------------------------------------------------------------------------------ + WindowAgg + Window: w AS (ORDER BY id, val ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Incremental Sort + Sort Key: id, val + Presorted Key: id + -> Index Scan using rpr_integ_id_idx on rpr_integ +(8 rows) + +-- Result: RPR over the incrementally sorted stream produces match +-- counts per row. +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_integ +WINDOW w AS (ORDER BY id, val + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) +ORDER BY id, val; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 2 + 4 | 25 | 0 + 5 | 5 | 3 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 20 | 3 + 9 | 40 | 0 + 10 | 45 | 0 +(10 rows) + +RESET enable_seqscan; +DROP INDEX rpr_integ_id_idx; +-- ============================================================ +-- B9. RPR + Volatile function in DEFINE +-- ============================================================ +-- Records the current behaviour: DEFINE today accepts volatile +-- functions such as random() and the query runs to completion. +-- To keep the expected output deterministic the predicate uses +-- "random() >= 0.0", which is structurally equivalent to TRUE and +-- therefore does not perturb the match result. The interesting +-- property is that volatile invocation does not crash or short- +-- circuit pattern matching. +-- +-- XXX: volatile functions in DEFINE are slated to be rejected at +-- parse time. Under RPR's NFA engine the same row's DEFINE +-- predicate may be evaluated multiple times (backtracking, +-- PREV/NEXT navigation), so a truly volatile result would make +-- pattern matching non-deterministic. When the prohibition lands, +-- this test must be replaced with an error-case test that expects +-- random() in DEFINE to be rejected. +SELECT id, val, count(*) OVER w AS cnt +FROM rpr_integ +WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val) AND random() >= 0.0) +ORDER BY id; + id | val | cnt +----+-----+----- + 1 | 10 | 2 + 2 | 20 | 0 + 3 | 15 | 2 + 4 | 25 | 0 + 5 | 5 | 3 + 6 | 30 | 0 + 7 | 35 | 0 + 8 | 20 | 3 + 9 | 40 | 0 + 10 | 45 | 0 +(10 rows) + +-- ============================================================ +-- B10. RPR + Correlated subquery in WHERE +-- ============================================================ +-- Verify that an RPR window placed inside a correlated scalar +-- subquery is executed once per outer row. DEFINE still references +-- only local columns (qualified refs from the outer query are not +-- supported in DEFINE); the correlation lives in the subquery's +-- WHERE clause as "i.id <= o.id". The plan must show a SubPlan +-- attached to the outer scan, with the RPR WindowAgg driven by a +-- per-row scan filter carrying the correlation predicate. +-- Plan: SubPlan attached to the outer Seq Scan; the inner scan +-- carries "Filter: (id <= o.id)", confirming the correlation is +-- evaluated per outer row. +EXPLAIN (COSTS OFF) +SELECT o.id, o.val, + (SELECT count(*) OVER w + FROM rpr_integ i + WHERE i.id <= o.id + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) + ORDER BY id + LIMIT 1) AS first_cnt +FROM rpr_integ o +ORDER BY o.id; + QUERY PLAN +----------------------------------------------------------------------------------------------------- + Sort + Sort Key: o.id + -> Seq Scan on rpr_integ o + SubPlan expr_1 + -> Limit + -> WindowAgg + Window: w AS (ORDER BY i.id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b+ + Nav Mark Lookback: 1 + -> Sort + Sort Key: i.id + -> Seq Scan on rpr_integ i + Filter: (id <= o.id) +(13 rows) + +-- Result: each outer row receives the first_cnt from its own +-- correlated RPR subquery. +SELECT o.id, o.val, + (SELECT count(*) OVER w + FROM rpr_integ i + WHERE i.id <= o.id + WINDOW w AS (ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B+) + DEFINE B AS val > PREV(val)) + ORDER BY id + LIMIT 1) AS first_cnt +FROM rpr_integ o +ORDER BY o.id; + id | val | first_cnt +----+-----+----------- + 1 | 10 | 0 + 2 | 20 | 2 + 3 | 15 | 2 + 4 | 25 | 2 + 5 | 5 | 2 + 6 | 30 | 2 + 7 | 35 | 2 + 8 | 20 | 2 + 9 | 40 | 2 + 10 | 45 | 2 +(10 rows) + +-- Cleanup +DROP TABLE rpr_integ; +DROP TABLE rpr_integ2; diff --git a/src/test/regress/expected/rpr_nfa.out b/src/test/regress/expected/rpr_nfa.out new file mode 100644 index 00000000000..a19b26c3b94 --- /dev/null +++ b/src/test/regress/expected/rpr_nfa.out @@ -0,0 +1,4661 @@ +-- ============================================================ +-- RPR NFA Tests +-- Tests for Row Pattern Recognition NFA Runtime Execution +-- ============================================================ +-- +-- This test suite validates the NFA (Non-deterministic Finite +-- Automaton) runtime execution engine in nodeWindowAgg.c, +-- focusing on update_reduced_frame and related functions. +-- +-- Test Strategy: +-- Diagonal pattern style using ARRAY flags to explicitly +-- control which pattern variables match at each row. +-- +-- Test Coverage: +-- Basic NFA Flow (match->absorb->advance) +-- Absorption Optimization +-- Context Lifecycle Management +-- Advance Phase (Epsilon Transitions) +-- Match Phase (Variable Matching) +-- Frame Boundary Handling +-- State Management (Deduplication) +-- Statistics and Diagnostics +-- Quantifier Runtime Behavior +-- Pathological Pattern Protection +-- Alternation Runtime Behavior +-- Deep Nested Groups +-- SKIP Options (Runtime) +-- INITIAL Mode (Runtime) +-- Frame Boundary Variations +-- Special Partition Cases +-- DEFINE Special Cases +-- Absorption Dynamic Flags +-- Zero-Consumption Cycle Detection +-- +-- Responsibility: +-- - NFA runtime execution paths +-- - Context/State lifecycle management +-- - Runtime boundary conditions and protections +-- +-- NOT tested here (covered in other files): +-- - Pattern parsing/optimization (rpr_base.sql) +-- - EXPLAIN output (rpr_explain.sql) +-- - PREV/NEXT semantics (rpr.sql) +-- ============================================================ +-- ============================================================ +-- Basic NFA Flow +-- ============================================================ +-- Simple sequential pattern +WITH test_sequential AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['_']) -- No match + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_sequential +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {_} | | +(5 rows) + +-- Quantified pattern (A+ B+ C+) +WITH test_quantified AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['B']), + (6, ARRAY['C']), + (7, ARRAY['C']), + (8, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_quantified +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B+ C+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 7 + 2 | {A} | 2 | 7 + 3 | {A} | 3 | 7 + 4 | {B} | | + 5 | {B} | | + 6 | {C} | | + 7 | {C} | | + 8 | {_} | | +(8 rows) + +-- Optional pattern (A B? C) +WITH test_optional AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['C']), -- B skipped + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['C']), -- B matched + (6, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_optional +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B? C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {C} | | + 3 | {A} | 3 | 5 + 4 | {B} | | + 5 | {C} | | + 6 | {_} | | +(6 rows) + +-- Alternation pattern (A (B|C) D) +WITH test_alternation AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), -- First branch + (3, ARRAY['D']), + (4, ARRAY['A']), + (5, ARRAY['C']), -- Second branch + (6, ARRAY['D']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alternation +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A (B | C) D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {B} | | + 3 | {D} | | + 4 | {A} | 4 | 6 + 5 | {C} | | + 6 | {D} | | + 7 | {_} | | +(7 rows) + +-- ============================================================ +-- Absorption Optimization +-- ============================================================ +-- Absorbable pattern (A+) +WITH test_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {A} | 4 | 4 + 5 | {_} | | +(5 rows) + +-- Mixed absorbable/non-absorbable ((A+) | B) +WITH test_mixed_absorption AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_mixed_absorption +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+) | B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | 2 | 3 + 3 | {A} | 3 | 3 + 4 | {B} | 4 | 4 + 5 | {_} | | +(5 rows) + +-- State coverage (same elemIdx, different count) +WITH test_state_coverage AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_state_coverage +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{2,} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | | + 4 | {B} | | + 5 | {_} | | +(5 rows) + +-- Reluctant pattern (A+?) - not absorbable +-- Compare with greedy A+ above: reluctant excluded from absorption. +-- Each context produces minimum match independently. +WITH test_reluctant_absorption AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_absorption +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+?) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 1 + 2 | {A} | 2 | 2 + 3 | {A} | 3 | 3 + 4 | {A} | 4 | 4 + 5 | {_} | | +(5 rows) + +-- Absorption with fixed suffix: A+ B +WITH test_absorb_suffix AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_suffix +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {B} | | + 5 | {X} | | +(5 rows) + +-- Per-branch absorption with ALT: B+ C | B+ D +WITH test_absorb_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['D']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (B+ C | B+ D) + DEFINE + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B} | 1 | 4 + 2 | {B} | 2 | 4 + 3 | {B} | 3 | 4 + 4 | {D} | | + 5 | {X} | | +(5 rows) + +-- Non-absorbable: A B+ (unbounded not in first position) +WITH test_no_absorb AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_no_absorb +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {B} | | + 3 | {B} | | + 4 | {B} | | + 5 | {X} | | +(5 rows) + +-- GROUP merge enables absorption: (A B) (A B)+ optimized to (A B){2,} +WITH test_absorb_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A B) (A B)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {B} | | + 3 | {A} | 3 | 6 + 4 | {B} | | + 5 | {A} | | + 6 | {B} | | + 7 | {X} | | +(7 rows) + +-- Fixed-length group absorption: (A B{2})+ C +-- B{2} has min == max, equivalent to unrolling to (A B B)+ C +WITH test_absorb_fixedlen AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['A']), + (5, ARRAY['B']), + (6, ARRAY['B']), + (7, ARRAY['A']), + (8, ARRAY['B']), + (9, ARRAY['B']), + (10, ARRAY['C']), + (11, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_fixedlen +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B{2})+ C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 10 + 2 | {B} | | + 3 | {B} | | + 4 | {A} | | + 5 | {B} | | + 6 | {B} | | + 7 | {A} | | + 8 | {B} | | + 9 | {B} | | + 10 | {C} | | + 11 | {X} | | +(11 rows) + +-- Consecutive vars merged to fixed-length: (A B B)+ -> (A B{2})+ +-- mergeConsecutiveVars produces B{2}; now absorbable with fixed-length check +WITH test_absorb_consecutive AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['A']), + (5, ARRAY['B']), + (6, ARRAY['B']), + (7, ARRAY['A']), + (8, ARRAY['B']), + (9, ARRAY['B']), + (10, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_consecutive +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B B)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 9 + 2 | {B} | | + 3 | {B} | | + 4 | {A} | | + 5 | {B} | | + 6 | {B} | | + 7 | {A} | | + 8 | {B} | | + 9 | {B} | | + 10 | {X} | | +(10 rows) + +-- Nested fixed-length group absorption: (A (B C){2} D)+ E +-- Inner group {2} has min == max; absorbable via recursive check +-- step_size = 1 + (1+1)*2 + 1 = 6 +WITH test_absorb_nested_fixedlen AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['B']), + (5, ARRAY['C']), + (6, ARRAY['D']), + (7, ARRAY['A']), + (8, ARRAY['B']), + (9, ARRAY['C']), + (10, ARRAY['B']), + (11, ARRAY['C']), + (12, ARRAY['D']), + (13, ARRAY['E']), + (14, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_nested_fixedlen +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A (B C){2} D)+ E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 13 + 2 | {B} | | + 3 | {C} | | + 4 | {B} | | + 5 | {C} | | + 6 | {D} | | + 7 | {A} | | + 8 | {B} | | + 9 | {C} | | + 10 | {B} | | + 11 | {C} | | + 12 | {D} | | + 13 | {E} | | + 14 | {X} | | +(14 rows) + +-- Doubly nested fixed-length group absorption: (A ((B C{3}){2} D){2} E)+ F +-- step_size = 1 + ((1+3)*2+1)*2 + 1 = 20; 2 iterations + F = 41 rows +WITH test_absorb_doubly_nested AS ( + SELECT v AS id, ARRAY[ + CASE + WHEN v % 41 IN (1, 21) THEN 'A' + WHEN v % 41 IN (2, 6, 11, 15, 22, 26, 31, 35) THEN 'B' + WHEN v % 41 IN (3,4,5, 7,8,9, 12,13,14, 16,17,18, + 23,24,25, 27,28,29, 32,33,34, 36,37,38) THEN 'C' + WHEN v % 41 IN (10, 19, 30, 39) THEN 'D' + WHEN v % 41 IN (20, 40) THEN 'E' + WHEN v % 41 = 0 THEN 'F' + ELSE 'X' + END + ] AS flags + FROM generate_series(1, 82) AS s(v) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_doubly_nested +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A ((B C C C){2} D){2} E)+ F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 41 + 2 | {B} | | + 3 | {C} | | + 4 | {C} | | + 5 | {C} | | + 6 | {B} | | + 7 | {C} | | + 8 | {C} | | + 9 | {C} | | + 10 | {D} | | + 11 | {B} | | + 12 | {C} | | + 13 | {C} | | + 14 | {C} | | + 15 | {B} | | + 16 | {C} | | + 17 | {C} | | + 18 | {C} | | + 19 | {D} | | + 20 | {E} | | + 21 | {A} | | + 22 | {B} | | + 23 | {C} | | + 24 | {C} | | + 25 | {C} | | + 26 | {B} | | + 27 | {C} | | + 28 | {C} | | + 29 | {C} | | + 30 | {D} | | + 31 | {B} | | + 32 | {C} | | + 33 | {C} | | + 34 | {C} | | + 35 | {B} | | + 36 | {C} | | + 37 | {C} | | + 38 | {C} | | + 39 | {D} | | + 40 | {E} | | + 41 | {F} | | + 42 | {A} | 42 | 82 + 43 | {B} | | + 44 | {C} | | + 45 | {C} | | + 46 | {C} | | + 47 | {B} | | + 48 | {C} | | + 49 | {C} | | + 50 | {C} | | + 51 | {D} | | + 52 | {B} | | + 53 | {C} | | + 54 | {C} | | + 55 | {C} | | + 56 | {B} | | + 57 | {C} | | + 58 | {C} | | + 59 | {C} | | + 60 | {D} | | + 61 | {E} | | + 62 | {A} | | + 63 | {B} | | + 64 | {C} | | + 65 | {C} | | + 66 | {C} | | + 67 | {B} | | + 68 | {C} | | + 69 | {C} | | + 70 | {C} | | + 71 | {D} | | + 72 | {B} | | + 73 | {C} | | + 74 | {C} | | + 75 | {C} | | + 76 | {B} | | + 77 | {C} | | + 78 | {C} | | + 79 | {C} | | + 80 | {D} | | + 81 | {E} | | + 82 | {F} | | +(82 rows) + +-- 3-level END chain: ((A (B C){2}){2})+ +-- Tests END(BC{2}) -> END(A..{2}) -> END(+) chaining +-- 2 iterations of +, each 10 rows: (A B C B C)(A B C B C) +WITH test_absorb_3level_end AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), -- 1st + iter, 1st {2}, A + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['B']), + (5, ARRAY['C']), -- 1st (BC){2} done + (6, ARRAY['A']), -- 1st + iter, 2nd {2}, A + (7, ARRAY['B']), + (8, ARRAY['C']), + (9, ARRAY['B']), + (10, ARRAY['C']), -- 2nd (BC){2} done, 1st {2} done, 1st + iter done + (11, ARRAY['A']), -- 2nd + iter, 1st {2}, A + (12, ARRAY['B']), + (13, ARRAY['C']), + (14, ARRAY['B']), + (15, ARRAY['C']), + (16, ARRAY['A']), -- 2nd + iter, 2nd {2}, A + (17, ARRAY['B']), + (18, ARRAY['C']), + (19, ARRAY['B']), + (20, ARRAY['C']), -- 2nd + iter done + (21, ARRAY['X']) -- no match, + ends + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_3level_end +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A (B C){2}){2})+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 20 + 2 | {B} | | + 3 | {C} | | + 4 | {B} | | + 5 | {C} | | + 6 | {A} | | + 7 | {B} | | + 8 | {C} | | + 9 | {B} | | + 10 | {C} | | + 11 | {A} | | + 12 | {B} | | + 13 | {C} | | + 14 | {B} | | + 15 | {C} | | + 16 | {A} | | + 17 | {B} | | + 18 | {C} | | + 19 | {B} | | + 20 | {C} | | + 21 | {X} | | +(21 rows) + +-- Multiple unbounded: A+ B+ (first element unbounded enables absorption) +WITH test_multi_unbounded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_multi_unbounded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {B} | | + 4 | {B} | | + 5 | {X} | | +(5 rows) + +-- ============================================================ +-- Context Lifecycle +-- ============================================================ +-- Multiple overlapping contexts (SKIP TO NEXT ROW) +WITH test_overlapping_contexts AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_overlapping_contexts +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {B} | | + 5 | {_} | | +(5 rows) + +-- Failed context cleanup (early failure) +WITH test_context_cleanup AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Pruned at first row + (2, ARRAY['A']), + (3, ARRAY['_']), -- Mismatched after row 2 + (4, ARRAY['A']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_context_cleanup +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {_} | | + 2 | {A} | | + 3 | {_} | | + 4 | {A} | 4 | 5 + 5 | {B} | | +(5 rows) + +-- Partition end (incomplete contexts) +WITH test_partition_end AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']) + -- Pattern requires B, but partition ends + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_partition_end +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | | + 3 | {A} | | +(3 rows) + +-- Completed context encountered during processing +-- Pattern (A | B C D): Ctx1 takes long B->C->D path, while Ctx2 takes +-- short A path and completes first. Next row sees Ctx2 +-- with states=NULL and skips it. +WITH test_completed_ctx AS ( + SELECT * FROM (VALUES + (1, ARRAY['B', '_']), + (2, ARRAY['C', 'A']), + (3, ARRAY['D', '_']), + (4, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_completed_ctx +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A | B C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B,_} | 1 | 3 + 2 | {C,A} | 2 | 2 + 3 | {D,_} | | + 4 | {_,_} | | +(4 rows) + +-- Reluctant context lifecycle (A+? B with SKIP TO NEXT ROW) +-- A+? exits early but if B not available, falls back to loop. +-- Contexts not absorbed (reluctant), so multiple survive. +WITH test_reluctant_context AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_context +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | 2 | 3 + 3 | {B} | | + 4 | {_} | | +(4 rows) + +-- ============================================================ +-- Advance Phase (Epsilon Transitions) +-- ============================================================ +-- Nested groups ((A B)+) +WITH test_nested_groups AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_groups +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A B)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {B} | | + 3 | {A} | 3 | 6 + 4 | {B} | | + 5 | {A} | 5 | 6 + 6 | {B} | | + 7 | {_} | | +(7 rows) + +-- Multiple alternation branches (A (B|C|D) E) +WITH test_multi_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['E']), + (4, ARRAY['A']), + (5, ARRAY['C']), + (6, ARRAY['E']), + (7, ARRAY['A']), + (8, ARRAY['D']), + (9, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_multi_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A (B | C | D) E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {B} | | + 3 | {E} | | + 4 | {A} | 4 | 6 + 5 | {C} | | + 6 | {E} | | + 7 | {A} | 7 | 9 + 8 | {D} | | + 9 | {E} | | +(9 rows) + +-- Optional VAR at start (A? B C) +WITH test_optional_var AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), -- A skipped + (2, ARRAY['C']), + (3, ARRAY['A']), -- A matched + (4, ARRAY['B']), + (5, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_optional_var +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A? B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B} | 1 | 2 + 2 | {C} | | + 3 | {A} | 3 | 5 + 4 | {B} | 4 | 5 + 5 | {C} | | +(5 rows) + +-- Nested alternation ((A|B) (C|D)) +WITH test_nested_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['C']), -- A C + (3, ARRAY['A']), + (4, ARRAY['D']), -- A D + (5, ARRAY['B']), + (6, ARRAY['C']), -- B C + (7, ARRAY['B']), + (8, ARRAY['D']) -- B D + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A | B) (C | D)) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {C} | | + 3 | {A} | 3 | 4 + 4 | {D} | | + 5 | {B} | 5 | 6 + 6 | {C} | | + 7 | {B} | 7 | 8 + 8 | {D} | | +(8 rows) + +-- Mixed greedy/reluctant sequence: A+? B+ (reluctant A, greedy B) +-- A exits as early as possible, B consumes the rest greedily +WITH test_mixed_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A','B']), + (4, ARRAY['B']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_mixed_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+? B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {A} | | + 3 | {A,B} | | + 4 | {B} | | + 5 | {B} | | +(5 rows) + +-- Optional reluctant group: (A B)?? C +-- nfa_advance_begin: reluctant tries skip first, but skip path needs C +-- at row 1 which is A -> skip fails. Enter path succeeds: A(1) B(2) C(3). +WITH test_optional_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_optional_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)?? C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {B} | | + 3 | {C} | | +(3 rows) + +-- Greedy/reluctant sequence: A+ B+? (greedy A, reluctant B at end) +-- A consumes greedily, B+? exits to FIN after minimum match +WITH test_greedy_then_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A','B']), + (3, ARRAY['B']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_greedy_then_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+?) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A,B} | | + 3 | {B} | | + 4 | {B} | | +(4 rows) + +-- Reluctant optional group skip-to-FIN +-- When a reluctant optional group's skip path reaches FIN, the group +-- entry path is abandoned (nodeWindowAgg.c nfa_advance_begin). +-- Pattern: C (A B)?? -- after C matches, the reluctant group (A B)?? +-- prefers to skip. Skip goes to FIN (group is last element), so +-- the match completes with just C. +WITH test_begin_skip_fin AS ( + SELECT * FROM (VALUES + (1, ARRAY['C']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['C','A']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_begin_skip_fin +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (C (A B)??) + DEFINE + C AS 'C' = ANY(flags), + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {C} | 1 | 1 + 2 | {A} | | + 3 | {B} | | + 4 | {C,A} | 4 | 4 + 5 | {B} | | +(5 rows) + +-- ============================================================ +-- Match Phase +-- ============================================================ +-- Simple VAR with END next (A B C all min=max=1) +WITH test_simple_var AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_simple_var +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {B} | | + 3 | {C} | | + 4 | {_} | | +(4 rows) + +-- VAR max exceeded (A{2,3}) +WITH test_max_exceeded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), -- Max = 3 + (4, ARRAY['A']), -- Exceeds max, state removed + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_max_exceeded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{2,3} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | 2 | 5 + 3 | {A} | 3 | 5 + 4 | {A} | | + 5 | {B} | | +(5 rows) + +-- Non-matching VAR (DEFINE false) +WITH test_non_matching AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['_']), -- B not matched (DEFINE false) + (3, ARRAY['A']), + (4, ARRAY['B']), -- B matched + (5, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_non_matching +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {_} | | + 3 | {A} | 3 | 5 + 4 | {B} | | + 5 | {C} | | +(5 rows) + +-- ============================================================ +-- Frame Boundary Handling +-- ============================================================ +-- Limited frame (ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING) +WITH test_limited_frame AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), -- Within 3 FOLLOWING + (5, ARRAY['B']), -- Beyond 3 FOLLOWING from row 1 + (6, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_limited_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {B} | | + 5 | {B} | | + 6 | {_} | | +(6 rows) + +-- Unbounded frame (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) +WITH test_unbounded_frame AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['B']) -- Far from start, but unbounded + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_unbounded_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {A} | 2 | 6 + 3 | {A} | 3 | 6 + 4 | {A} | 4 | 6 + 5 | {A} | 5 | 6 + 6 | {B} | | +(6 rows) + +-- Match exceeds frame boundary +WITH test_frame_exceeded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']) + -- Frame ends at row 3 (2 FOLLOWING), B never appears + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_frame_exceeded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | | + 3 | {A} | | +(3 rows) + +-- Frame boundary forced mismatch +-- Limited frame with enough rows so that a context's frame boundary +-- is exceeded while still processing. +WITH test_frame_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_frame_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | | + 3 | {A} | | + 4 | {A} | 4 | 6 + 5 | {A} | 5 | 6 + 6 | {B} | | +(6 rows) + +-- Reluctant with limited frame (A+? B with 2 FOLLOWING) +-- Reluctant exits early, B must be within frame boundary +WITH test_reluctant_frame AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | 2 | 3 + 3 | {B} | | + 4 | {_} | | +(4 rows) + +-- ============================================================ +-- State Management +-- ============================================================ +-- Duplicate state creation +WITH test_duplicate_states AS ( + SELECT * FROM (VALUES + (1, ARRAY['A', 'B']), -- Both A and B match (creates duplicate states via different paths) + (2, ARRAY['C', '_']), + (3, ARRAY['D', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_duplicate_states +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A | B) C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 3 + 2 | {C,_} | | + 3 | {D,_} | | +(3 rows) + +-- Reluctant duplicate state handling +-- (A+? | B+?) creates exit and loop states; exit paths may converge +WITH test_reluctant_dedup AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), + (2, ARRAY['A','B']), + (3, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_dedup +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+? | B+?)) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 1 + 2 | {A,B} | 2 | 2 + 3 | {_} | | +(3 rows) + +-- Large pattern (stress free list) +WITH test_large_pattern AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']), + (6, ARRAY['F']), + (7, ARRAY['G']), + (8, ARRAY['H']), + (9, ARRAY['I']), + (10, ARRAY['J']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_large_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D E F G H I J) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags), + G AS 'G' = ANY(flags), + H AS 'H' = ANY(flags), + I AS 'I' = ANY(flags), + J AS 'J' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 10 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {E} | | + 6 | {F} | | + 7 | {G} | | + 8 | {H} | | + 9 | {I} | | + 10 | {J} | | +(10 rows) + +-- Reduced frame map reallocation (> 1024 rows) +WITH test_map_realloc AS ( + SELECT id, CASE WHEN id % 2 = 1 THEN ARRAY['A'] ELSE ARRAY['B'] END AS flags + FROM generate_series(1, 1100) AS id +) +SELECT count(*), min(match_start), max(match_end) +FROM ( + SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end + FROM test_map_realloc + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) + ) +) sub; + count | min | max +-------+-----+------ + 1100 | 1 | 1100 +(1 row) + +-- ============================================================ +-- Statistics and Diagnostics +-- ============================================================ +-- Matched contexts +WITH test_matched AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_matched +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {B} | | + 3 | {A} | 3 | 4 + 4 | {B} | | +(4 rows) + +-- Pruned contexts (failed at first row) +WITH test_pruned AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Pruned + (2, ARRAY['_']), -- Pruned + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_pruned +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {_} | | + 2 | {_} | | + 3 | {A} | 3 | 4 + 4 | {B} | | +(4 rows) + +-- Mismatched contexts (failed after multiple rows) +WITH test_mismatched AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['_']), -- Mismatched after 2 rows + (4, ARRAY['A']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_mismatched +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | | + 3 | {_} | | + 4 | {A} | 4 | 5 + 5 | {B} | | +(5 rows) + +-- Reluctant not absorbed (A+? with SKIP TO NEXT ROW) +-- Compare with greedy A+ below: reluctant is not absorbable, +-- so all contexts survive independently. +WITH test_reluctant_stats AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_stats +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+?) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 1 + 2 | {A} | 2 | 2 + 3 | {A} | 3 | 3 + 4 | {A} | 4 | 4 + 5 | {_} | | +(5 rows) + +-- Absorbed contexts +WITH test_absorbed AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorbed +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {A} | 4 | 4 + 5 | {_} | | +(5 rows) + +-- Skipped contexts (SKIP TO NEXT ROW) +WITH test_skipped AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) -- Completes match starting at row 1 + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skipped +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {B} | | +(4 rows) + +-- ============================================================ +-- Quantifier Runtime Behavior +-- ============================================================ +-- Large count handling (A{100}) +WITH test_large_count AS ( + SELECT i AS id, ARRAY['A'] AS flags + FROM generate_series(1, 105) i +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_large_count +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{100}) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +-----+-------+-------------+----------- + 1 | {A} | 1 | 100 + 2 | {A} | 2 | 101 + 3 | {A} | 3 | 102 + 4 | {A} | 4 | 103 + 5 | {A} | 5 | 104 + 6 | {A} | 6 | 105 + 7 | {A} | | + 8 | {A} | | + 9 | {A} | | + 10 | {A} | | + 11 | {A} | | + 12 | {A} | | + 13 | {A} | | + 14 | {A} | | + 15 | {A} | | + 16 | {A} | | + 17 | {A} | | + 18 | {A} | | + 19 | {A} | | + 20 | {A} | | + 21 | {A} | | + 22 | {A} | | + 23 | {A} | | + 24 | {A} | | + 25 | {A} | | + 26 | {A} | | + 27 | {A} | | + 28 | {A} | | + 29 | {A} | | + 30 | {A} | | + 31 | {A} | | + 32 | {A} | | + 33 | {A} | | + 34 | {A} | | + 35 | {A} | | + 36 | {A} | | + 37 | {A} | | + 38 | {A} | | + 39 | {A} | | + 40 | {A} | | + 41 | {A} | | + 42 | {A} | | + 43 | {A} | | + 44 | {A} | | + 45 | {A} | | + 46 | {A} | | + 47 | {A} | | + 48 | {A} | | + 49 | {A} | | + 50 | {A} | | + 51 | {A} | | + 52 | {A} | | + 53 | {A} | | + 54 | {A} | | + 55 | {A} | | + 56 | {A} | | + 57 | {A} | | + 58 | {A} | | + 59 | {A} | | + 60 | {A} | | + 61 | {A} | | + 62 | {A} | | + 63 | {A} | | + 64 | {A} | | + 65 | {A} | | + 66 | {A} | | + 67 | {A} | | + 68 | {A} | | + 69 | {A} | | + 70 | {A} | | + 71 | {A} | | + 72 | {A} | | + 73 | {A} | | + 74 | {A} | | + 75 | {A} | | + 76 | {A} | | + 77 | {A} | | + 78 | {A} | | + 79 | {A} | | + 80 | {A} | | + 81 | {A} | | + 82 | {A} | | + 83 | {A} | | + 84 | {A} | | + 85 | {A} | | + 86 | {A} | | + 87 | {A} | | + 88 | {A} | | + 89 | {A} | | + 90 | {A} | | + 91 | {A} | | + 92 | {A} | | + 93 | {A} | | + 94 | {A} | | + 95 | {A} | | + 96 | {A} | | + 97 | {A} | | + 98 | {A} | | + 99 | {A} | | + 100 | {A} | | + 101 | {A} | | + 102 | {A} | | + 103 | {A} | | + 104 | {A} | | + 105 | {A} | | +(105 rows) + +-- Unlimited quantifier (A{10,}) +WITH test_unlimited AS ( + SELECT i AS id, ARRAY['A'] AS flags + FROM generate_series(1, 15) i + UNION ALL + SELECT 16, ARRAY['B'] +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_unlimited +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{10,} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 16 + 2 | {A} | 2 | 16 + 3 | {A} | 3 | 16 + 4 | {A} | 4 | 16 + 5 | {A} | 5 | 16 + 6 | {A} | 6 | 16 + 7 | {A} | | + 8 | {A} | | + 9 | {A} | | + 10 | {A} | | + 11 | {A} | | + 12 | {A} | | + 13 | {A} | | + 14 | {A} | | + 15 | {A} | | + 16 | {B} | | +(16 rows) + +-- Min boundary (A{3,5}) +WITH test_min_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), -- Min=3 reached, exit path available + (4, ARRAY['B']), -- Match ends at min + (5, ARRAY['A']), + (6, ARRAY['A']), + (7, ARRAY['A']), + (8, ARRAY['A']), + (9, ARRAY['A']), -- Count=5, max reached + (10, ARRAY['B']) -- Match ends at max + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_min_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{3,5} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | | + 3 | {A} | | + 4 | {B} | | + 5 | {A} | 5 | 10 + 6 | {A} | 6 | 10 + 7 | {A} | 7 | 10 + 8 | {A} | | + 9 | {A} | | + 10 | {B} | | +(10 rows) + +-- Max boundary exceeded (A{3,5}) +WITH test_max_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['A']), -- Count=6 > max=5, row 1 context removed + (7, ARRAY['B']) -- Row 1 context: no match (exceeded max) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_max_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{3,5} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | 2 | 7 + 3 | {A} | 3 | 7 + 4 | {A} | 4 | 7 + 5 | {A} | | + 6 | {A} | | + 7 | {B} | | +(7 rows) + +-- Greedy vs reluctant: A+ matches all rows, A+? matches minimum +WITH test_greedy_vs_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','_']), + (2, ARRAY['A','_']), + (3, ARRAY['A','B']), + (4, ARRAY['B','_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_greedy_vs_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,_} | 1 | 4 + 2 | {A,_} | | + 3 | {A,B} | | + 4 | {B,_} | | +(4 rows) + +-- Same data, reluctant A+? exits at row 3 where B is first available +WITH test_greedy_vs_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','_']), + (2, ARRAY['A','_']), + (3, ARRAY['A','B']), + (4, ARRAY['B','_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_greedy_vs_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,_} | 1 | 3 + 2 | {A,_} | | + 3 | {A,B} | | + 4 | {B,_} | | +(4 rows) + +-- Reluctant group: (A B)+? matches minimum 1 iteration +WITH test_reluctant_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+?) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {B} | | + 3 | {A} | 3 | 4 + 4 | {B} | | +(4 rows) + +-- A+? B (reluctant plus): exits A at first B availability +-- (Same scenario as greedy-vs-reluctant comparison above; retained for +-- standalone quantifier coverage alongside A{1,3}? and A{2,3}? below) +WITH test_reluctant_plus AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','_']), + (2, ARRAY['A','_']), + (3, ARRAY['A','B']), + (4, ARRAY['B','_']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_reluctant_plus +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,_} | 1 | 3 + 2 | {A,_} | | + 3 | {A,B} | | + 4 | {B,_} | | +(4 rows) + +-- A{1,3}? B (reluctant bounded): same data, bounded quantifier +WITH test_reluctant_bounded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','_']), + (2, ARRAY['A','_']), + (3, ARRAY['A','B']), + (4, ARRAY['B','_']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_reluctant_bounded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{1,3}? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,_} | 1 | 3 + 2 | {A,_} | | + 3 | {A,B} | | + 4 | {B,_} | | +(4 rows) + +-- ============================================================ +-- Pathological Pattern Runtime Protection +-- ============================================================ +-- Complex nested nullable ((A* B*)*) - Runtime protection +WITH test_complex_nested AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_complex_nested +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A* B*)*) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {B} | 3 | 4 + 4 | {B} | 4 | 4 + 5 | {C} | | +(5 rows) + +-- Nested nullable with quantifier ((A{0,3})*) +WITH test_nested_quantifier AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_quantifier +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A{0,3})*) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | 2 | 3 + 3 | {A} | 3 | 3 + 4 | {B} | | +(4 rows) + +-- Reluctant nullable: A*? (prefers 0 matches) +-- A*? always takes skip path (0 iterations preferred) +WITH test_reluctant_nullable AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_nullable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A*? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | | + 3 | {A} | | + 4 | {_} | | +(4 rows) + +-- ============================================================ +-- Alternation Runtime Behavior +-- ============================================================ +-- Multi-branch alternation (A (B|C|D|E) F) +WITH test_multi_branch AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['F']), + (4, ARRAY['A']), + (5, ARRAY['C']), + (6, ARRAY['F']), + (7, ARRAY['A']), + (8, ARRAY['D']), + (9, ARRAY['F']), + (10, ARRAY['A']), + (11, ARRAY['E']), + (12, ARRAY['F']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_multi_branch +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A (B | C | D | E) F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {B} | | + 3 | {F} | | + 4 | {A} | 4 | 6 + 5 | {C} | | + 6 | {F} | | + 7 | {A} | 7 | 9 + 8 | {D} | | + 9 | {F} | | + 10 | {A} | 10 | 12 + 11 | {E} | | + 12 | {F} | | +(12 rows) + +-- Alternation with quantifiers (A+ | B+ | C+) +WITH test_alt_quantifiers AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['B']), + (6, ARRAY['C']), + (7, ARRAY['C']), + (8, ARRAY['C']), + (9, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_quantifiers +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ | B+ | C+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | 2 | 3 + 3 | {A} | 3 | 3 + 4 | {B} | 4 | 5 + 5 | {B} | 5 | 5 + 6 | {C} | 6 | 9 + 7 | {C} | 7 | 9 + 8 | {C} | 8 | 9 + 9 | {C} | 9 | 9 +(9 rows) + +-- altPriority replacement (A B C | D) +-- D branch (higher altPriority) matches first at row 1, +-- then A B C branch (lower altPriority) replaces it at row 3. +WITH test_alt_replace AS ( + SELECT * FROM (VALUES + (1, ARRAY['A', 'D']), + (2, ARRAY['B', '_']), + (3, ARRAY['C', '_']), + (4, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_replace +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C | D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,D} | 1 | 3 + 2 | {B,_} | | + 3 | {C,_} | | + 4 | {_,_} | | +(4 rows) + +-- ALT lexical order takes priority over greedy (longer match). +-- Row 1 matches both A and B; A wins by lexical order (match 1-1). +WITH test_alt_lexical_order AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), -- A and B both match + (2, ARRAY['_','C']) -- only C matches (would continue B C) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_lexical_order +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B C)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 1 + 2 | {_,C} | | +(2 rows) + +-- ALT with reluctant: (A+? | B+) - A branch is reluctant, B is greedy. +-- Row 1 matches both A and B. A+? exits immediately (match 1-1). +WITH test_alt_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), + (2, ARRAY['B','_']), + (3, ARRAY['B','_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A+? | B+)) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 1 + 2 | {B,_} | 2 | 3 + 3 | {B,_} | | +(3 rows) + +-- Optional first branch in ALT with quantifier: (A? | B){1,2} +-- First branch A? exit path may loop back to ALT and trigger cycle +-- detection during DFS. All branches must receive correct counts. +WITH test_alt_opt_first AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['B']), + (3, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_opt_first +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A? | B){1,2})) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B} | 1 | 2 + 2 | {B} | 2 | 3 + 3 | {B} | 3 | 3 +(3 rows) + +-- Mixed A/B rows across iterations of (A? | B){1,2} +WITH test_alt_opt_mixed AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A','B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_opt_mixed +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A? | B){1,2})) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {B} | 2 | 3 + 3 | {A,B} | 3 | 3 +(3 rows) + +-- Reluctant variant: (A?? | B){1,2} +WITH test_alt_opt_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['B']), + (3, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_opt_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A?? | B){1,2})) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B} | 1 | 2 + 2 | {B} | 2 | 3 + 3 | {B} | 3 | 3 +(3 rows) + +-- Overlapping match: A B C D E | B C D | C D E F (SKIP PAST LAST ROW) +WITH test_overlap1 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']), + (6, ARRAY['F']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap1 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E | B C D | C D E F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {E} | | + 6 | {F} | | +(6 rows) + +-- Same with SKIP TO NEXT ROW: three overlapping matches +WITH test_overlap1 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']), + (6, ARRAY['F']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap1 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D E | B C D | C D E F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {B} | 2 | 4 + 3 | {C} | 3 | 6 + 4 | {D} | | + 5 | {E} | | + 6 | {F} | | +(6 rows) + +-- Longer pattern fails, shorter survives: A+ B C D E | B+ C +WITH test_overlap1b AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap1b +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C D E | B+ C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {B} | 2 | 3 + 3 | {C} | | + 4 | {D} | | + 5 | {X} | | +(5 rows) + +-- Long B sequence with different endings: A B+ C | B+ D +WITH test_overlap2 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['B']), + (6, ARRAY['C']), + (7, ARRAY['B']), + (8, ARRAY['B']), + (9, ARRAY['B']), + (10, ARRAY['D']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap2 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B+ C | B+ D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {B} | | + 3 | {B} | | + 4 | {B} | | + 5 | {B} | | + 6 | {C} | | + 7 | {B} | 7 | 10 + 8 | {B} | 8 | 10 + 9 | {B} | 9 | 10 + 10 | {D} | | +(10 rows) + +-- Greedy with late failure ("betrayal"): A B C+ D | A B +WITH test_betrayal AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['C']), + (5, ARRAY['C']), + (6, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_betrayal +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C+ D | A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {B} | | + 3 | {C} | | + 4 | {C} | | + 5 | {C} | | + 6 | {E} | | +(6 rows) + +-- Multiple TRUE per row: overlapping pattern variables +WITH test_multi_true AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), + (2, ARRAY['B','C']), + (3, ARRAY['C','D']), + (4, ARRAY['D','E']), + (5, ARRAY['E','_']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_multi_true +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 5 + 2 | {B,C} | | + 3 | {C,D} | | + 4 | {D,E} | | + 5 | {E,_} | | +(5 rows) + +-- Diagonal pattern with shifted multi-TRUE overlap +WITH test_diagonal AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','_']), + (2, ARRAY['B','A']), + (3, ARRAY['C','B']), + (4, ARRAY['D','C']), + (5, ARRAY['_','D']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_diagonal +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,_} | 1 | 4 + 2 | {B,A} | 2 | 5 + 3 | {C,B} | | + 4 | {D,C} | | + 5 | {_,D} | | +(5 rows) + +-- ((A | B) C)+ - alternation inside group with outer quantifier +WITH test_alt_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['C']), + (3, ARRAY['B']), + (4, ARRAY['C']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_alt_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A | B) C)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {C} | | + 3 | {B} | | + 4 | {C} | | + 5 | {X} | | +(5 rows) + +-- ============================================================ +-- Deep Nested Groups +-- ============================================================ +-- Three-level nesting ((((A B)+)+)+) +WITH test_deep_nesting AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_deep_nesting +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((((A B)+)+)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {B} | | + 3 | {A} | 3 | 6 + 4 | {B} | | + 5 | {A} | 5 | 6 + 6 | {B} | | + 7 | {_} | | +(7 rows) + +-- Multiple groups in nesting (((A B) (C D))+) +WITH test_nested_sequential AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['C']), + (8, ARRAY['D']), + (9, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_sequential +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A B) (C D))+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 8 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {A} | 5 | 8 + 6 | {B} | | + 7 | {C} | | + 8 | {D} | | + 9 | {_} | | +(9 rows) + +-- Nested END->END max reached +-- Inner group (A B){2} reaches max=2 -> exits to outer END +WITH test_end_nested_max AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['A']), + (8, ARRAY['B']), + (9, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_end_nested_max +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A B){2})+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 8 + 2 | {B} | | + 3 | {A} | 3 | 6 + 4 | {B} | | + 5 | {A} | 5 | 8 + 6 | {B} | | + 7 | {A} | | + 8 | {B} | | + 9 | {_} | | +(9 rows) + +-- Nested END->END between min/max +-- Inner group (A B){1,3} exits between min/max -> outer END count++ +WITH test_end_nested_mid AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['A']), + (8, ARRAY['B']), + (9, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_end_nested_mid +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A B){1,3})+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 8 + 2 | {B} | | + 3 | {A} | 3 | 8 + 4 | {B} | | + 5 | {A} | 5 | 8 + 6 | {B} | | + 7 | {A} | 7 | 8 + 8 | {B} | | + 9 | {_} | | +(9 rows) + +-- Nested reluctant group ((A B)+?) with following element C +-- Inner group exits after minimum 1 iteration +WITH test_nested_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A B)+? C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {B} | | + 3 | {A} | 3 | 5 + 4 | {B} | | + 5 | {C} | | +(5 rows) + +-- (A B){2} - group with exact quantifier +WITH test_group_exact AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_group_exact +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B){2}) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {B} | | + 3 | {A} | | + 4 | {B} | | + 5 | {X} | | +(5 rows) + +-- Nested END->END fast-forward +-- When an inner group has a nullable body and count < min, the +-- fast-forward path exits through the outer END, incrementing +-- the outer group's count (nodeWindowAgg.c nfa_advance_end). +-- Pattern: ((A?){2,3}){2,3} -- nested groups, neither collapses +-- because the optimizer cannot safely multiply non-exact quantifiers. +-- Data has no A rows, forcing all-empty iterations via fast-forward. +WITH test_nested_ff AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['B']), + (3, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_ff +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A?){2,3}){2,3}) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B} | | + 2 | {B} | | + 3 | {B} | | +(3 rows) + +-- ============================================================ +-- SKIP Options (Runtime) +-- ============================================================ +-- SKIP PAST LAST ROW (non-overlapping matches) +WITH test_skip_past AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_past +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | | + 3 | {A} | | + 4 | {A} | | + 5 | {_} | | +(5 rows) + +-- SKIP TO NEXT ROW (overlapping matches) +WITH test_skip_next AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_next +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {A} | 4 | 4 + 5 | {_} | | +(5 rows) + +-- SKIP difference verification +WITH test_skip_diff AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT 'SKIP PAST' AS mode, id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_diff +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +) +UNION ALL +SELECT 'SKIP NEXT' AS mode, id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_diff +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +) +ORDER BY mode, id; + mode | id | flags | match_start | match_end +-----------+----+-------+-------------+----------- + SKIP NEXT | 1 | {A} | 1 | 2 + SKIP NEXT | 2 | {B} | | + SKIP NEXT | 3 | {A} | 3 | 4 + SKIP NEXT | 4 | {B} | | + SKIP PAST | 1 | {A} | 1 | 2 + SKIP PAST | 2 | {B} | | + SKIP PAST | 3 | {A} | 3 | 4 + SKIP PAST | 4 | {B} | | +(8 rows) + +-- Reluctant SKIP comparison: A+? with SKIP PAST vs SKIP NEXT +WITH test_reluctant_skip AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT 'SKIP PAST' AS mode, id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_skip +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+?) + DEFINE + A AS 'A' = ANY(flags) +) +UNION ALL +SELECT 'SKIP NEXT' AS mode, id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_skip +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+?) + DEFINE + A AS 'A' = ANY(flags) +) +ORDER BY mode, id; + mode | id | flags | match_start | match_end +-----------+----+-------+-------------+----------- + SKIP NEXT | 1 | {A} | 1 | 1 + SKIP NEXT | 2 | {A} | 2 | 2 + SKIP NEXT | 3 | {A} | 3 | 3 + SKIP NEXT | 4 | {_} | | + SKIP PAST | 1 | {A} | 1 | 1 + SKIP PAST | 2 | {A} | 2 | 2 + SKIP PAST | 3 | {A} | 3 | 3 + SKIP PAST | 4 | {_} | | +(8 rows) + +-- ============================================================ +-- INITIAL Mode (Runtime) +-- Placeholder: INITIAL is not yet implemented (syntax error). +-- Kept here so tests convert to runtime tests when implemented. +-- ============================================================ +-- INITIAL mode (not yet supported - produces syntax error) +WITH test_initial_mode AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Unmatched + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['_']), -- Unmatched + (5, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_initial_mode +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); +ERROR: syntax error at or near "AFTER" +LINE 18: AFTER MATCH SKIP TO NEXT ROW + ^ +-- Default mode (include all rows) +WITH test_default_mode AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Unmatched, but included + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['_']), -- Unmatched, but included + (5, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_default_mode +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {_} | | + 2 | {A} | 2 | 3 + 3 | {A} | 3 | 3 + 4 | {_} | | + 5 | {A} | 5 | 5 +(5 rows) + +-- Mode difference verification (INITIAL not yet supported - produces syntax error) +WITH test_mode_diff AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), + (2, ARRAY['A']), + (3, ARRAY['_']) + ) AS t(id, flags) +) +SELECT 'INITIAL' AS mode, COUNT(*) AS row_count +FROM ( + SELECT id FROM test_mode_diff + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE A AS 'A' = ANY(flags) + ) +) sub +UNION ALL +SELECT 'DEFAULT' AS mode, COUNT(*) AS row_count +FROM ( + SELECT id FROM test_mode_diff + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE A AS 'A' = ANY(flags) + ) +) sub +ORDER BY mode; +ERROR: syntax error at or near "AFTER" +LINE 15: AFTER MATCH SKIP TO NEXT ROW + ^ +-- ============================================================ +-- Frame Boundary Variations +-- ============================================================ +-- Very limited frame (1 FOLLOWING) +WITH test_one_following AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), -- Within 1 FOLLOWING + (3, ARRAY['A']), -- Beyond 1 FOLLOWING from row 1 + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_one_following +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {B} | | + 3 | {A} | 3 | 4 + 4 | {B} | | +(4 rows) + +-- Medium frame (10 FOLLOWING) +WITH test_ten_following AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['A']), + (7, ARRAY['A']), + (8, ARRAY['A']), + (9, ARRAY['A']), + (10, ARRAY['A']), + (11, ARRAY['B']), -- Within 10 FOLLOWING from row 1 + (12, ARRAY['A']), + (13, ARRAY['B']) -- Beyond 10 FOLLOWING from row 1 + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_ten_following +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 10 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 11 + 2 | {A} | 2 | 11 + 3 | {A} | 3 | 11 + 4 | {A} | 4 | 11 + 5 | {A} | 5 | 11 + 6 | {A} | 6 | 11 + 7 | {A} | 7 | 11 + 8 | {A} | 8 | 11 + 9 | {A} | 9 | 11 + 10 | {A} | 10 | 11 + 11 | {B} | | + 12 | {A} | 12 | 13 + 13 | {B} | | +(13 rows) + +-- Exact boundary match +WITH test_exact_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['B']) -- Exactly at 4 FOLLOWING (frame end) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_exact_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {A} | 2 | 5 + 3 | {A} | 3 | 5 + 4 | {A} | 4 | 5 + 5 | {B} | | +(5 rows) + +-- N FOLLOWING + SKIP TO NEXT ROW: overlapping matches bounded by frame +-- Row 1: frame [1,4], A(1-3) B(4) -> match +-- Row 2: frame [2,5], A(2-3) B(4) -> match +-- Row 3: frame [3,6], A(3) B(4) -> match +-- Row 5: frame [5,6], A(5) B(6) -> match +WITH test_n_skip_next AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_n_skip_next +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {B} | | + 5 | {A} | 5 | 6 + 6 | {B} | | +(6 rows) + +-- Frame exactly 1 row short of potential match +-- From row 1: A A A B needs 4 rows but frame holds 3 -> no match +-- From row 2: A A B fits in 3-row frame -> match +WITH test_frame_one_short AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_frame_one_short +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {B} | | + 5 | {A} | 5 | 6 + 6 | {B} | | +(6 rows) + +-- ============================================================ +-- Special Partition Cases +-- ============================================================ +-- Empty partition (0 rows) +WITH test_empty_partition AS ( + SELECT * FROM (VALUES + (1, 1, ARRAY['A']), + (2, 2, ARRAY['_']) -- Different partition + ) AS t(id, part, flags) +) +SELECT id, part, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_empty_partition +WHERE part = 99 -- No rows match +WINDOW w AS ( + PARTITION BY part + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE + A AS 'A' = ANY(flags) +); + id | part | flags | match_start | match_end +----+------+-------+-------------+----------- +(0 rows) + +-- Single row partition +WITH test_single_row AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_single_row +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 1 +(1 row) + +-- All rows fail matching (all DEFINE false) +WITH test_all_fail AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_all_fail +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS false -- All rows fail +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | | + 3 | {A} | | +(3 rows) + +-- Partition end with absorbable pattern +-- SKIP PAST LAST ROW + unbounded frame + all rows match A +-- Triggers absorb in !rowExists path at partition boundary. +WITH test_absorb_partition_end AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorb_partition_end +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {A} | | + 3 | {A} | | + 4 | {A} | | + 5 | {A} | | +(5 rows) + +-- ============================================================ +-- DEFINE Special Cases +-- ============================================================ +-- Undefined variable in DEFINE +WITH test_undefined_var AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['X']), -- B not defined, defaults to TRUE + (3, ARRAY['C']), + (4, ARRAY['A']), + (5, ARRAY['_']), -- B defaults to TRUE, but no flags + (6, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_undefined_var +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C) + DEFINE + A AS 'A' = ANY(flags), + -- B is undefined, defaults to TRUE + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {X} | | + 3 | {C} | | + 4 | {A} | 4 | 6 + 5 | {_} | | + 6 | {C} | | +(6 rows) + +-- ============================================================ +-- Absorption Dynamic Flags +-- ============================================================ +-- Partial absorbable pattern ((A+) B) +WITH test_partial_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_partial_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+) B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {B} | | + 5 | {_} | | +(5 rows) + +-- Dynamic flag update ((A+) | B) +WITH test_dynamic_flags AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_dynamic_flags +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+) | B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | 2 | 3 + 3 | {A} | 3 | 3 + 4 | {B} | 4 | 4 + 5 | {A} | 5 | 5 + 6 | {B} | 6 | 6 +(6 rows) + +-- Non-absorbable context during absorption +-- Pattern (A B)+ C: A,B in absorbable group, C is not. +-- When END exits to C via nfa_state_create, isAbsorbable becomes false. +WITH test_non_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['C']), + (6, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_non_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {B} | | + 3 | {A} | | + 4 | {B} | | + 5 | {C} | | + 6 | {_} | | +(6 rows) + +-- Absorption flags early return (!hasAbsorbableState) +-- Pattern (A B)+ C D with SKIP PAST LAST ROW +-- After reaching C (non-absorbable), hasAbsorbableState becomes false. +-- On next row (D), the early return fires. +WITH test_absorption_early_return AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['C']), + (6, ARRAY['D']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorption_early_return +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {B} | | + 3 | {A} | | + 4 | {B} | | + 5 | {C} | | + 6 | {D} | | + 7 | {_} | | +(7 rows) + +-- Coverage failure: older can't cover newer's states +-- Pattern A+ | B+ with SKIP PAST LAST ROW. +-- Row 1: only A -> Ctx1 takes A branch only (B fails). +-- Row 2: A and B -> Ctx2 takes both branches. +-- Absorption: Ctx1 has A but no B -> can't cover Ctx2's B state -> fails. +WITH test_coverage_fail AS ( + SELECT * FROM (VALUES + (1, ARRAY['A', '_']), + (2, ARRAY['A', 'B']), + (3, ARRAY['A', '_']), + (4, ARRAY['A', '_']), + (5, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_coverage_fail +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ | B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,_} | 1 | 4 + 2 | {A,B} | | + 3 | {A,_} | | + 4 | {A,_} | | + 5 | {_,_} | | +(5 rows) + +-- Absorb skips completed context (older->states==NULL) +-- Pattern A+ | B+ with SKIP PAST LAST ROW. +-- Row 1: A only -> Ctx1 takes A branch. Row 2: B only -> Ctx1 A fails (completed). +-- Ctx2 takes B branch. Absorption: Ctx1 states==NULL -> skip. +WITH test_older_completed AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_older_completed +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ | B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 1 + 2 | {B} | 2 | 3 + 3 | {B} | | + 4 | {_} | | +(4 rows) + +-- Absorb skips non-absorbable context (!hasAbsorbableState) +-- Pattern A+ | B C with SKIP PAST LAST ROW (only A+ branch absorbable). +-- Row 1: B only -> Ctx1 takes B branch (non-absorbable), advances to C. +-- Row 2: C,A -> Ctx1 C matches (hasAbsorbableState=false). Ctx2 takes A (absorbable). +-- Absorption: Ctx1 !hasAbsorbableState -> skip. +WITH test_older_non_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['B', '_']), + (2, ARRAY['C', 'A']), + (3, ARRAY['_', 'A']), + (4, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_older_non_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ | B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B,_} | 1 | 2 + 2 | {C,A} | | + 3 | {_,A} | 3 | 3 + 4 | {_,_} | | +(4 rows) + +-- Reluctant branch in ALT not absorbable: (A+?) | B +-- A+? is reluctant so not absorbable. Compare with greedy (A+) | B above. +WITH test_reluctant_alt_absorption AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_alt_absorption +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+?) | B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 1 + 2 | {A} | 2 | 2 + 3 | {A} | 3 | 3 + 4 | {B} | 4 | 4 + 5 | {_} | | +(5 rows) + +-- ============================================================ +-- Zero-Consumption Cycle Detection +-- ============================================================ +-- Cycle prevention at count > 0: (A*)* inner skip cycles at count=3 +WITH test_cycle_nonzero AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) -- Inner A* matches 0, cycles at count=3 + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_cycle_nonzero +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A*)*) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | 2 | 3 + 3 | {A} | 3 | 3 + 4 | {B} | | +(4 rows) + +-- Cycle with mixed nullables: (A* B*)* multiple nullable paths +WITH test_cycle_mixed AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_cycle_mixed +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A* B*)*) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {B} | 2 | 3 + 3 | {A} | 3 | 3 + 4 | {C} | | +(4 rows) + +-- ============================================================ +-- Standard Clause 7: Formal Pattern Matching Rules +-- ISO/IEC 19075-5:2021, Clause 7 +-- ============================================================ +-- ------------------------------------------------------------ +-- 7.2.2 Alternation: first alternative is preferred +-- ------------------------------------------------------------ +-- (A | B): A preferred over B when both could match +-- Row 1 has both A and B flags: A should be chosen (first alternative) +WITH test_alt_prefer AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), + (2, ARRAY['B']), + (3, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_prefer +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A | B)) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 1 + 2 | {B} | 2 | 2 + 3 | {A} | 3 | 3 +(3 rows) + +-- (A{1,2} | B{2,3}): all A-matches before all B-matches +-- Standard example: preferment order is AA, A, BBB, BB +-- Rows 1-2 have both A and B: greedy A{1,2} should match 1-2 +WITH test_alt_quantified AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), + (2, ARRAY['A','B']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_quantified +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A{1,2} | B{2,3})) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 2 + 2 | {A,B} | 2 | 2 + 3 | {B} | 3 | 5 + 4 | {B} | 4 | 5 + 5 | {B} | | +(5 rows) + +-- ------------------------------------------------------------ +-- 7.2.3 Concatenation: lexicographic ordering +-- ------------------------------------------------------------ +-- ((A | B) (C | D)): preferment order is AC, AD, BC, BD +-- Row 1 matches A and B, Row 2 matches C and D +-- Preferred match: A then C (first alternatives in both positions) +WITH test_concat_lex AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), + (2, ARRAY['C','D']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_concat_lex +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (C | D)) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 2 + 2 | {C,D} | | +(2 rows) + +-- ((A | B) C): first alt (A) fails, second alt (B) succeeds +-- Tests backtracking: row 1 has only B, row 2 has C +WITH test_concat_backtrack AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['C']), + (3, ARRAY['A']), + (4, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_concat_backtrack +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A | B) C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B} | 1 | 2 + 2 | {C} | | + 3 | {A} | 3 | 4 + 4 | {C} | | +(4 rows) + +-- ------------------------------------------------------------ +-- 7.2.4 Quantification: greedy/reluctant, lexicographic > length +-- ------------------------------------------------------------ +-- V{2,4} greedy: longer match preferred +WITH test_quant_greedy AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_quant_greedy +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,4}) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | | + 3 | {A} | | + 4 | {B} | | +(4 rows) + +-- V{2,4}? reluctant: shorter match preferred +WITH test_quant_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_quant_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,4}?) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {A} | | + 3 | {A} | | + 4 | {B} | | +(4 rows) + +-- ((A|B){1,2}) greedy: lexicographic > length +-- Standard example: preferment AA, AB, A, BA, BB, B +-- Single A preferred over B-starting longer match +WITH test_quant_lex_greedy AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), + (2, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_quant_lex_greedy +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A | B){1,2})) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 2 + 2 | {B} | | +(2 rows) + +-- ((A|B){1,2}?) reluctant: lexicographic > length +-- Standard example: preferment A, AA, AB, B, BA, BB +-- Single A preferred over any B-starting match +WITH test_quant_lex_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), + (2, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_quant_lex_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A | B){1,2}?)) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 1 + 2 | {B} | 2 | 2 +(2 rows) + +-- ------------------------------------------------------------ +-- 7.2.6 Anchors (not yet implemented - syntax error expected) +-- ------------------------------------------------------------ +-- ^ anchor: not yet supported +SELECT count(*) OVER w FROM (SELECT 1 AS v) t +WINDOW w AS (ORDER BY v ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (^ A) DEFINE A AS TRUE); +ERROR: syntax error at or near "^" +LINE 3: PATTERN (^ A) DEFINE A AS TRUE); + ^ +-- $ anchor: not yet supported +SELECT count(*) OVER w FROM (SELECT 1 AS v) t +WINDOW w AS (ORDER BY v ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A $) DEFINE A AS TRUE); +ERROR: syntax error at or near "$" +LINE 3: PATTERN (A $) DEFINE A AS TRUE); + ^ +-- ------------------------------------------------------------ +-- 7.2.8 Infinite repetitions of empty matches +-- (Perl lower-bound stopping rule) +-- ------------------------------------------------------------ +-- Standard examples from 7.2.8: +-- (A?){0,3}: allowed strings include STR00=(), STR01=(A), STR02=(empty), +-- STR03=(AA), STR04=(A,empty), STR07=(AAA), STR08=(AA,empty) +-- (A?){1,3}: same as {0,3} but STR00 excluded (min=1 not met) +-- (A?){2,3}: STR03-06 (len 2) and STR07,08,11,12 (len 3) are valid +-- STR06=(STRE,STRE) IS valid because non-final STRE at +-- position 1 fills the lower bound +-- (A??)*B: Standard 7.2.8 introductory example +-- "matched against a sequence of rows for which the only feasible +-- matching is: B" +-- A?? is reluctant, prefers empty. * is greedy but Perl rule stops +-- after empty match with min(=0) satisfied. +-- Expected: each B row matches alone (A?? empty, * stops, B matches) +WITH test_empty_reluctant_star AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['B']), + (3, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_empty_reluctant_star +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A??)* B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B} | 1 | 1 + 2 | {B} | 2 | 2 + 3 | {C} | | +(3 rows) + +-- (A?){0,3}: min=0, nullable inner. +-- A never matches. A? matches empty, min=0 satisfied immediately. +-- Per standard: empty match expected for every row. +-- XXX: visited bitmap blocks empty iteration -> no match (same as {2,3}) +WITH test_728_min0 AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['B']), + (3, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_728_min0 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A?){0,3}) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B} | | + 2 | {B} | | + 3 | {B} | | +(3 rows) + +-- (A?){1,3}: min=1, nullable inner. +-- A never matches. Need 1 empty iteration to satisfy min=1. +-- Per standard: empty match expected for every row. +-- XXX: visited bitmap blocks empty iteration -> no match (same as {2,3}) +WITH test_728_min1 AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['B']), + (3, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_728_min1 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A?){1,3}) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B} | | + 2 | {B} | | + 3 | {B} | | +(3 rows) + +-- (A?){2,3}: min=2, nullable inner. +-- A never matches. Need 2 empty iterations to satisfy min=2. +-- Per standard: STR06=(STRE STRE) is valid for min=2. +-- Expected: empty match for every row +-- XXX: visited bitmap blocks second empty iteration -> match failure +WITH test_728_min2 AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['B']), + (3, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_728_min2 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A?){2,3}) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B} | | + 2 | {B} | | + 3 | {B} | | +(3 rows) + +-- (A?){2,3} mixed: some rows match A, some don't +-- Rows 1-2: A matches, greedy takes 2 -> min satisfied +-- Row 3: A doesn't match, needs 2 empty iterations for min=2 +-- XXX: Row 3 fails due to visited bitmap (same as pure empty {2,3}) +-- Row 4: A matches 1 real iter + 1 ff empty exit -> match 4-4 +WITH test_728_min2_mixed AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_728_min2_mixed +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A?){2,3}) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {A} | 2 | 2 + 3 | {B} | | + 4 | {A} | 4 | 4 +(4 rows) + +-- (A? B?){2,3}: multi-element nullable body with real matches +-- Body A? B? is nullable (both optional), but A and B DO match rows. +-- Real (non-empty) iterations loop back normally; fast-forward only +-- fires as a parallel exit path (EXIT ONLY, no greedy/reluctant loop). +-- Data: alternating A, B rows (6 rows) +-- Greedy: each row gets the longest match from its starting position. +-- Row 1: 3 iters (A@1,B@2)(A@3,B@4)(A@5,B@6) -> 1-6 +-- Row 5: 1 real iter + 1 ff empty exit -> 5-6 +WITH test_728_multi_body AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_728_multi_body +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A? B?){2,3}) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {B} | 2 | 6 + 3 | {A} | 3 | 6 + 4 | {B} | 4 | 6 + 5 | {A} | 5 | 6 + 6 | {B} | 6 | 6 +(6 rows) + +-- (A? B?){2,3}: pure empty body (nothing matches) +-- XXX: All NULL: same issue as test_728_min2 (empty match at context +-- start yields UNMATCHED via startPos-1 initial advance) +WITH test_728_multi_empty AS ( + SELECT * FROM (VALUES + (1, ARRAY['C']), + (2, ARRAY['C']), + (3, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_728_multi_empty +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A? B?){2,3}) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {C} | | + 2 | {C} | | + 3 | {C} | | +(3 rows) + +-- (A? B?){2,3}: mixed real and empty iterations +-- Row 1: iter1 real (A@1,B@2), iter2 at row 3 empty -> ff exit, match 1-2 +-- Row 3: C doesn't match A or B -> NULL +-- Row 4: iter1 real (A@4,B@5), iter2 at end empty -> ff exit, match 4-5 +WITH test_728_multi_mixed AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['A']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_728_multi_mixed +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A? B?){2,3}) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {B} | 2 | 2 + 3 | {C} | | + 4 | {A} | 4 | 5 + 5 | {B} | 5 | 5 +(5 rows) + +-- ------------------------------------------------------------ +-- 7.3 Pattern matching in theory and practice +-- ------------------------------------------------------------ +-- Standard's worked example: A? B+ with specific data +-- Preferment order: (A)(BBB), (A)(BB), (A)(B), ()(BBB), ()(BB), ()(B) +-- Row 1: A condition (price>100) is false -> A fails +-- Backtrack: empty A?, then B+ from row 1 +-- Expected: rows 1-3 match as B (A? takes empty match) +WITH test_73_example AS ( + SELECT * FROM (VALUES + (1, 60), + (2, 70), + (3, 40) + ) AS t(id, price) +) +SELECT id, price, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_73_example +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A? B+) + DEFINE + A AS price > 100, + B AS TRUE +); + id | price | match_start | match_end +----+-------+-------------+----------- + 1 | 60 | 1 | 3 + 2 | 70 | | + 3 | 40 | | +(3 rows) + -- 2.43.0