From 9a401a09b56b18bd08ca576ba44b4017be231fc2 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii Date: Sat, 2 May 2026 13:40:29 +0900 Subject: [PATCH v47 5/9] Row pattern recognition patch (executor and commands). --- src/backend/commands/explain.c | 522 ++++++++ src/backend/executor/Makefile | 1 + src/backend/executor/README.rpr | 1578 ++++++++++++++++++++++ src/backend/executor/execExpr.c | 92 ++ src/backend/executor/execExprInterp.c | 267 ++++ src/backend/executor/execRPR.c | 1772 +++++++++++++++++++++++++ src/backend/executor/meson.build | 1 + src/backend/executor/nodeWindowAgg.c | 1066 ++++++++++++++- src/backend/jit/llvm/llvmjit_expr.c | 78 +- src/backend/jit/llvm/llvmjit_types.c | 2 + src/backend/utils/adt/windowfuncs.c | 119 +- src/include/catalog/pg_proc.dat | 24 + src/include/executor/execExpr.h | 20 + src/include/executor/execRPR.h | 40 + src/include/executor/nodeWindowAgg.h | 3 + src/include/nodes/execnodes.h | 130 ++ 16 files changed, 5703 insertions(+), 12 deletions(-) create mode 100644 src/backend/executor/README.rpr create mode 100644 src/backend/executor/execRPR.c create mode 100644 src/include/executor/execRPR.h diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 112c17b0d64..99de36b57f2 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -30,6 +30,7 @@ #include "nodes/extensible.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "optimizer/rpr.h" #include "parser/analyze.h" #include "parser/parsetree.h" #include "rewrite/rewriteHandler.h" @@ -119,6 +120,20 @@ static void show_window_def(WindowAggState *planstate, static void show_window_keys(StringInfo buf, PlanState *planstate, int nkeys, AttrNumber *keycols, List *ancestors, ExplainState *es); +static void append_rpr_quantifier(StringInfo buf, RPRPatternElement *elem); +static char *deparse_rpr_pattern(RPRPattern *pattern); +static void deparse_rpr_elements(RPRPattern *pattern, int *idx, + StringInfoData *buf, RPRDepth groupDepth, + RPRDepth *prevDepth, bool *needSpace); +static void deparse_rpr_group(RPRPattern *pattern, int *idx, + StringInfoData *buf, RPRDepth *prevDepth, + bool *needSpace); +static void deparse_rpr_alt(RPRPattern *pattern, int *idx, + StringInfoData *buf, RPRDepth *prevDepth, + bool *needSpace, List **altSeps); +static void deparse_rpr_var(RPRPattern *pattern, int *idx, + StringInfoData *buf, RPRDepth *prevDepth, + bool *needSpace, List **altSeps); static void show_storage_info(char *maxStorageType, int64 maxSpaceUsed, ExplainState *es); static void show_tablesample(TableSampleClause *tsc, PlanState *planstate, @@ -129,6 +144,7 @@ static void show_incremental_sort_info(IncrementalSortState *incrsortstate, static void show_hash_info(HashState *hashstate, ExplainState *es); static void show_material_info(MaterialState *mstate, ExplainState *es); static void show_windowagg_info(WindowAggState *winstate, ExplainState *es); +static void show_rpr_nfa_stats(WindowAggState *winstate, ExplainState *es); static void show_ctescan_info(CteScanState *ctescanstate, ExplainState *es); static void show_table_func_scan_info(TableFuncScanState *tscanstate, ExplainState *es); @@ -2898,6 +2914,284 @@ show_sortorder_options(StringInfo buf, Node *sortexpr, } } +/* + * Append quantifier suffix for a pattern element. + */ +static void +append_rpr_quantifier(StringInfo buf, RPRPatternElement *elem) +{ + /* Append quantifier if not {1,1} */ + if (elem->min == 0 && elem->max == RPR_QUANTITY_INF) + appendStringInfoChar(buf, '*'); + else if (elem->min == 1 && elem->max == RPR_QUANTITY_INF) + appendStringInfoChar(buf, '+'); + else if (elem->min == 0 && elem->max == 1) + appendStringInfoChar(buf, '?'); + else if (elem->max == RPR_QUANTITY_INF) + appendStringInfo(buf, "{%d,}", elem->min); + else if (elem->min == elem->max && elem->min != 1) + appendStringInfo(buf, "{%d}", elem->min); + else if (elem->min != 1 || elem->max != 1) + appendStringInfo(buf, "{%d,%d}", elem->min, elem->max); + + if (RPRElemIsReluctant(elem)) + { + if (elem->min == 1 && elem->max == 1) + appendStringInfo(buf, "{1}"); /* make reluctant ? unambiguous */ + appendStringInfoChar(buf, '?'); + } + + /* Append absorption markers: " for judgment point, ' for branch only */ + if (RPRElemIsAbsorbable(elem)) + { + Assert(elem->max == RPR_QUANTITY_INF); + appendStringInfoChar(buf, '"'); + } + else if (RPRElemIsAbsorbableBranch(elem)) + appendStringInfoChar(buf, '\''); +} + +/* + * Deparse a compiled RPRPattern (bytecode) back to pattern string. + * + * Walks the flat bytecode array using mutual recursion: deparse_rpr_elements + * processes sequential elements, and deparse_rpr_group handles BEGIN...END + * groups by recursing back into deparse_rpr_elements for the group content. + */ +static char * +deparse_rpr_pattern(RPRPattern *pattern) +{ + StringInfoData buf; + int idx = 0; + RPRDepth prevDepth = 0; + bool needSpace = false; + + Assert(pattern != NULL && pattern->numElements >= 2); + + initStringInfo(&buf); + + deparse_rpr_elements(pattern, &idx, &buf, RPR_DEPTH_NONE, + &prevDepth, &needSpace); + + /* Close remaining open parens */ + while (prevDepth > 0) + { + appendStringInfoChar(&buf, ')'); + prevDepth--; + } + + return buf.data; +} + +/* + * Process pattern elements sequentially until FIN or END at groupDepth. + * + * When groupDepth >= 0, stops at the matching END element (leaving idx + * pointing to it) so the caller (deparse_rpr_group) can consume it. + * When groupDepth < 0, processes until FIN (top-level call). + */ +static void +deparse_rpr_elements(RPRPattern *pattern, int *idx, StringInfoData *buf, + RPRDepth groupDepth, RPRDepth *prevDepth, + bool *needSpace) +{ + List *altSeps = NIL; /* pending alternation separator indices */ + + while (*idx < pattern->numElements) + { + RPRPatternElement *elem = &pattern->elements[*idx]; + + if (RPRElemIsFin(elem)) + break; + + /* Stop at END matching our group depth; caller handles it */ + if (RPRElemIsEnd(elem) && elem->depth == groupDepth) + break; + + /* Alternation separator */ + if (list_member_int(altSeps, *idx)) + { + /* Close parens to match separator depth first */ + while (*prevDepth > elem->depth) + { + appendStringInfoChar(buf, ')'); + (*prevDepth)--; + } + appendStringInfoString(buf, " | "); + *needSpace = false; + altSeps = list_delete_int(altSeps, *idx); + } + + /* Dispatch to element-type handlers */ + if (RPRElemIsAlt(elem)) + deparse_rpr_alt(pattern, idx, buf, prevDepth, + needSpace, &altSeps); + else if (RPRElemIsBegin(elem)) + deparse_rpr_group(pattern, idx, buf, prevDepth, + needSpace); + else if (RPRElemIsVar(elem)) + deparse_rpr_var(pattern, idx, buf, prevDepth, + needSpace, &altSeps); + } + list_free(altSeps); +} + +/* + * Process a BEGIN...END group. + * + * Consumes BEGIN, recurses into deparse_rpr_elements for group content, + * then consumes END and outputs the group quantifier. + * + * When the group wraps a single ALT with no siblings, the group-level + * parenthesis is suppressed since the ALT-to-children depth transition + * already provides it (avoids double parens like "((a | b))+"). + */ +static void +deparse_rpr_group(RPRPattern *pattern, int *idx, StringInfoData *buf, + RPRDepth *prevDepth, bool *needSpace) +{ + RPRPatternElement *begin = &pattern->elements[*idx]; + RPRDepth childDepth = begin->depth + 1; + bool singleAlt = false; + RPRPatternElement *end; + + /* + * Check if this group wraps a single ALT with no siblings. Scan from + * after ALT to END: if no element at childDepth exists, the ALT is the + * sole child. + */ + if (*idx + 1 < pattern->numElements && + RPRElemIsAlt(&pattern->elements[*idx + 1])) + { + int j; + + singleAlt = true; + for (j = *idx + 2; j < pattern->numElements; j++) + { + RPRPatternElement *e = &pattern->elements[j]; + + if (RPRElemIsEnd(e) && e->depth == begin->depth) + break; + if (e->depth <= childDepth) + { + singleAlt = false; + break; + } + } + } + + /* Open group paren (unless single ALT provides it) */ + if (!singleAlt) + { + if (*needSpace) + appendStringInfoChar(buf, ' '); + appendStringInfoChar(buf, '('); + *needSpace = false; + } + *prevDepth = childDepth; + (*idx)++; /* consume BEGIN */ + + /* Process group children; stops at matching END */ + deparse_rpr_elements(pattern, idx, buf, begin->depth, + prevDepth, needSpace); + + /* Consume END and output quantifier */ + Assert(*idx < pattern->numElements); + end = &pattern->elements[*idx]; + Assert(RPRElemIsEnd(end) && end->depth == begin->depth); + + while (*prevDepth > end->depth + 1) + { + appendStringInfoChar(buf, ')'); + (*prevDepth)--; + } + if (!singleAlt) + appendStringInfoChar(buf, ')'); + append_rpr_quantifier(buf, end); + *prevDepth = end->depth; + *needSpace = true; + (*idx)++; /* consume END */ +} + +/* + * Process an ALT element: adjust depth parens and register separator positions. + */ +static void +deparse_rpr_alt(RPRPattern *pattern, int *idx, StringInfoData *buf, + RPRDepth *prevDepth, bool *needSpace, List **altSeps) +{ + RPRPatternElement *elem = &pattern->elements[*idx]; + + /* Close parens for depth decrease */ + while (*prevDepth > elem->depth) + { + appendStringInfoChar(buf, ')'); + (*prevDepth)--; + *needSpace = true; + } + + /* Open parens up to ALT's depth */ + while (*prevDepth < elem->depth) + { + if (*needSpace) + appendStringInfoChar(buf, ' '); + appendStringInfoChar(buf, '('); + (*prevDepth)++; + *needSpace = false; + } + + /* Register next alternation separator position */ + if (elem->next != RPR_ELEMIDX_INVALID) + { + RPRPatternElement *firstElem = &pattern->elements[elem->next]; + + if (firstElem->jump != RPR_ELEMIDX_INVALID) + *altSeps = lappend_int(*altSeps, firstElem->jump); + } + if (elem->jump != RPR_ELEMIDX_INVALID) + *altSeps = lappend_int(*altSeps, elem->jump); + (*idx)++; +} + +/* + * Process a VAR element: adjust depth parens and output variable name. + */ +static void +deparse_rpr_var(RPRPattern *pattern, int *idx, StringInfoData *buf, + RPRDepth *prevDepth, bool *needSpace, List **altSeps) +{ + RPRPatternElement *elem = &pattern->elements[*idx]; + + /* Open parens for depth increase */ + while (*prevDepth < elem->depth) + { + if (*needSpace) + appendStringInfoChar(buf, ' '); + appendStringInfoChar(buf, '('); + (*prevDepth)++; + *needSpace = false; + } + + /* Close parens for depth decrease */ + while (*prevDepth > elem->depth) + { + appendStringInfoChar(buf, ')'); + (*prevDepth)--; + } + + if (*needSpace) + appendStringInfoChar(buf, ' '); + + Assert(elem->varId < pattern->numVars); + appendStringInfoString(buf, quote_identifier(pattern->varNames[elem->varId])); + append_rpr_quantifier(buf, elem); + *needSpace = true; + + if (elem->jump != RPR_ELEMIDX_INVALID) + *altSeps = lappend_int(*altSeps, elem->jump); + (*idx)++; +} + /* * Show the window definition for a WindowAgg node. */ @@ -2956,6 +3250,79 @@ show_window_def(WindowAggState *planstate, List *ancestors, ExplainState *es) appendStringInfoChar(&wbuf, ')'); ExplainPropertyText("Window", wbuf.data, es); pfree(wbuf.data); + + /* Show Row Pattern Recognition pattern if present */ + if (wagg->rpPattern != NULL) + { + char *patternStr = deparse_rpr_pattern(wagg->rpPattern); + + if (patternStr != NULL) + { + ExplainPropertyText("Pattern", patternStr, es); + pfree(patternStr); + } + + /* + * Show navigation offsets for tuplestore trim. For EXPLAIN ANALYZE, + * use the executor-resolved values (which may differ from the plan + * when NEEDS_EVAL was resolved to FIXED or RETAIN_ALL at init). + */ + { + RPRNavOffsetKind maxKind = wagg->navMaxOffsetKind; + int64 maxOffset = wagg->navMaxOffset; + RPRNavOffsetKind firstKind = wagg->navFirstOffsetKind; + int64 firstOffset = wagg->navFirstOffset; + + if (es->analyze) + { + maxKind = planstate->navMaxOffsetKind; + maxOffset = planstate->navMaxOffset; + firstKind = planstate->navFirstOffsetKind; + firstOffset = planstate->navFirstOffset; + } + + switch (maxKind) + { + case RPR_NAV_OFFSET_NEEDS_EVAL: + ExplainPropertyText("Nav Mark Lookback", "runtime", es); + break; + case RPR_NAV_OFFSET_RETAIN_ALL: + ExplainPropertyText("Nav Mark Lookback", "retain all", es); + break; + case RPR_NAV_OFFSET_FIXED: + ExplainPropertyInteger("Nav Mark Lookback", NULL, + maxOffset, es); + break; + default: + elog(ERROR, "unrecognized RPR nav offset kind: %d", + maxKind); + break; + } + + if (wagg->hasFirstNav) + { + switch (firstKind) + { + case RPR_NAV_OFFSET_NEEDS_EVAL: + ExplainPropertyText("Nav Mark Lookahead", "runtime", + es); + break; + case RPR_NAV_OFFSET_RETAIN_ALL: + ExplainPropertyText("Nav Mark Lookahead", "retain all", + es); + break; + case RPR_NAV_OFFSET_FIXED: + ExplainPropertyInteger("Nav Mark Lookahead", NULL, + firstOffset, es); + break; + default: + elog(ERROR, "unrecognized RPR nav offset kind: %d", + firstKind); + break; + } + } + } + } } /* @@ -3508,6 +3875,7 @@ show_windowagg_info(WindowAggState *winstate, ExplainState *es) { char *maxStorageType; int64 maxSpaceUsed; + WindowAgg *wagg = (WindowAgg *) winstate->ss.ps.plan; Tuplestorestate *tupstore = winstate->buffer; @@ -3520,6 +3888,160 @@ show_windowagg_info(WindowAggState *winstate, ExplainState *es) tuplestore_get_stats(tupstore, &maxStorageType, &maxSpaceUsed); show_storage_info(maxStorageType, maxSpaceUsed, es); + + /* Show NFA statistics for Row Pattern Recognition */ + if (wagg->rpPattern != NULL) + show_rpr_nfa_stats(winstate, es); +} + +/* + * Show NFA statistics for Row Pattern Recognition on WindowAgg node. + */ +static void +show_rpr_nfa_stats(WindowAggState *winstate, ExplainState *es) +{ + if (es->format != EXPLAIN_FORMAT_TEXT) + { + /* State and context counters */ + ExplainPropertyInteger("NFA States Peak", NULL, winstate->nfaStatesMax, es); + ExplainPropertyInteger("NFA States Total", NULL, winstate->nfaStatesTotalCreated, es); + ExplainPropertyInteger("NFA States Merged", NULL, winstate->nfaStatesMerged, es); + ExplainPropertyInteger("NFA Contexts Peak", NULL, winstate->nfaContextsMax, es); + ExplainPropertyInteger("NFA Contexts Total", NULL, winstate->nfaContextsTotalCreated, es); + ExplainPropertyInteger("NFA Contexts Absorbed", NULL, winstate->nfaContextsAbsorbed, es); + ExplainPropertyInteger("NFA Contexts Skipped", NULL, winstate->nfaContextsSkipped, es); + ExplainPropertyInteger("NFA Contexts Pruned", NULL, winstate->nfaContextsPruned, es); + + /* Match/mismatch counts and length statistics */ + ExplainPropertyInteger("NFA Matched", NULL, winstate->nfaMatchesSucceeded, es); + ExplainPropertyInteger("NFA Mismatched", NULL, winstate->nfaMatchesFailed, es); + if (winstate->nfaMatchesSucceeded > 0) + { + ExplainPropertyInteger("NFA Match Length Min", NULL, winstate->nfaMatchLen.min, es); + ExplainPropertyInteger("NFA Match Length Max", NULL, winstate->nfaMatchLen.max, es); + ExplainPropertyFloat("NFA Match Length Avg", NULL, + (double) winstate->nfaMatchLen.total / winstate->nfaMatchesSucceeded, 1, + es); + } + if (winstate->nfaMatchesFailed > 0) + { + ExplainPropertyInteger("NFA Mismatch Length Min", NULL, winstate->nfaFailLen.min, es); + ExplainPropertyInteger("NFA Mismatch Length Max", NULL, winstate->nfaFailLen.max, es); + ExplainPropertyFloat("NFA Mismatch Length Avg", NULL, + (double) winstate->nfaFailLen.total / winstate->nfaMatchesFailed, 1, + es); + } + + /* Absorbed/skipped context length statistics */ + if (winstate->nfaContextsAbsorbed > 0) + { + ExplainPropertyInteger("NFA Absorbed Length Min", NULL, winstate->nfaAbsorbedLen.min, es); + ExplainPropertyInteger("NFA Absorbed Length Max", NULL, winstate->nfaAbsorbedLen.max, es); + ExplainPropertyFloat("NFA Absorbed Length Avg", NULL, + (double) winstate->nfaAbsorbedLen.total / winstate->nfaContextsAbsorbed, 1, + es); + } + if (winstate->nfaContextsSkipped > 0) + { + ExplainPropertyInteger("NFA Skipped Length Min", NULL, winstate->nfaSkippedLen.min, es); + ExplainPropertyInteger("NFA Skipped Length Max", NULL, winstate->nfaSkippedLen.max, es); + ExplainPropertyFloat("NFA Skipped Length Avg", NULL, + (double) winstate->nfaSkippedLen.total / winstate->nfaContextsSkipped, 1, + es); + } + } + else + { + /* State and context counters */ + ExplainIndentText(es); + appendStringInfo(es->str, + "NFA States: " INT64_FORMAT " peak, " INT64_FORMAT " total, " INT64_FORMAT " merged\n", + winstate->nfaStatesMax, + winstate->nfaStatesTotalCreated, + winstate->nfaStatesMerged); + ExplainIndentText(es); + appendStringInfo(es->str, + "NFA Contexts: " INT64_FORMAT " peak, " INT64_FORMAT " total, " INT64_FORMAT " pruned\n", + winstate->nfaContextsMax, + winstate->nfaContextsTotalCreated, + winstate->nfaContextsPruned); + + /* Match/mismatch counts with length min/max/avg */ + ExplainIndentText(es); + appendStringInfo(es->str, "NFA: "); + if (winstate->nfaMatchesSucceeded > 0) + { + double avgLen = (double) winstate->nfaMatchLen.total / winstate->nfaMatchesSucceeded; + + appendStringInfo(es->str, + INT64_FORMAT " matched (len " INT64_FORMAT "/" INT64_FORMAT "/%.1f)", + winstate->nfaMatchesSucceeded, + winstate->nfaMatchLen.min, + winstate->nfaMatchLen.max, + avgLen); + } + else + { + appendStringInfo(es->str, "0 matched"); + } + if (winstate->nfaMatchesFailed > 0) + { + double avgFail = (double) winstate->nfaFailLen.total / winstate->nfaMatchesFailed; + + appendStringInfo(es->str, + ", " INT64_FORMAT " mismatched (len " INT64_FORMAT "/" INT64_FORMAT "/%.1f)", + winstate->nfaMatchesFailed, + winstate->nfaFailLen.min, + winstate->nfaFailLen.max, + avgFail); + } + else + { + appendStringInfo(es->str, ", 0 mismatched"); + } + appendStringInfoChar(es->str, '\n'); + + /* Absorbed/skipped context length statistics */ + if (winstate->nfaContextsAbsorbed > 0 || winstate->nfaContextsSkipped > 0) + { + ExplainIndentText(es); + appendStringInfo(es->str, "NFA: "); + + if (winstate->nfaContextsAbsorbed > 0) + { + double avgAbsorbed = (double) winstate->nfaAbsorbedLen.total / winstate->nfaContextsAbsorbed; + + appendStringInfo(es->str, + INT64_FORMAT " absorbed (len " INT64_FORMAT "/" INT64_FORMAT "/%.1f)", + winstate->nfaContextsAbsorbed, + winstate->nfaAbsorbedLen.min, + winstate->nfaAbsorbedLen.max, + avgAbsorbed); + } + else + { + appendStringInfo(es->str, "0 absorbed"); + } + + if (winstate->nfaContextsSkipped > 0) + { + double avgSkipped = (double) winstate->nfaSkippedLen.total / winstate->nfaContextsSkipped; + + appendStringInfo(es->str, + ", " INT64_FORMAT " skipped (len " INT64_FORMAT "/" INT64_FORMAT "/%.1f)", + winstate->nfaContextsSkipped, + winstate->nfaSkippedLen.min, + winstate->nfaSkippedLen.max, + avgSkipped); + } + else + { + appendStringInfo(es->str, ", 0 skipped"); + } + + appendStringInfoChar(es->str, '\n'); + } + } } /* diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile index 11118d0ce02..2b257427795 100644 --- a/src/backend/executor/Makefile +++ b/src/backend/executor/Makefile @@ -25,6 +25,7 @@ OBJS = \ execParallel.o \ execPartition.o \ execProcnode.o \ + execRPR.o \ execReplication.o \ execSRF.o \ execScan.o \ diff --git a/src/backend/executor/README.rpr b/src/backend/executor/README.rpr new file mode 100644 index 00000000000..52bcd77390c --- /dev/null +++ b/src/backend/executor/README.rpr @@ -0,0 +1,1578 @@ +============================================================================ + PostgreSQL Row Pattern Recognition: Flat-Array Stream NFA Guide +============================================================================ + + Target audience: Developers with a basic understanding of the PostgreSQL + executor and planner architecture + + Scope: The entire process from PATTERN/DEFINE clause parsing to NFA + runtime execution + + Related code: + - src/backend/parser/parse_rpr.c (parser phase) + - src/backend/optimizer/plan/rpr.c (optimizer phase) + - src/backend/executor/nodeWindowAgg.c (executor phase, window agg) + - src/backend/executor/execRPR.c (executor phase, NFA engine) + - src/include/executor/execRPR.h (NFA public API) + - src/include/nodes/plannodes.h (plan node definitions) + - src/include/nodes/execnodes.h (execution state definitions) + - src/include/optimizer/rpr.h (types and constants) + - src/backend/optimizer/plan/createplan.c (nav offset computation) + +============================================================================ + +What is a Flat-Array Stream NFA? + + The NFA in this implementation is not a traditional state-transition graph + but a flat array of fixed-size 16-byte elements. At runtime, it processes + the row stream in a forward-only manner, expanding epsilon transitions + eagerly without backtracking. + + - Flat-Array: Pattern compiled into a flat array, + not a graph (Chapter IV) + - Stream: Rows consumed sequentially in one direction, + never revisited (Chapter XII) + - NFA: Nondeterministic execution where multiple states + coexist within a single context (Chapter VI) + +Chapter I Row Pattern Recognition Overview +============================================================================ + +Row Pattern Recognition (hereafter RPR) is a feature introduced in SQL:2016 +that matches regex-based patterns against ordered row sets. + +The SQL standard defines two forms: + + Feature R010: MATCH_RECOGNIZE (FROM clause) + - Dedicated table operator + - Provides dedicated functions such as MATCH_NUMBER(), CLASSIFIER() + - Supports ONE ROW PER MATCH / ALL ROWS PER MATCH + + Feature R020: RPR in a window (WINDOW clause) + - Integrated into the existing window function framework + - Supports ALL ROWS PER MATCH only + - No MATCH_NUMBER() + +This implementation targets Feature R020. + +The basic syntax is as follows: + + SELECT ... + OVER ( + PARTITION BY ... + ORDER BY ... + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + [INITIAL | SEEK] -- SEEK is defined in the standard but not implemented + AFTER MATCH SKIP TO NEXT ROW | SKIP PAST LAST ROW + PATTERN ( ) + DEFINE AS , ... + ) + +The PATTERN clause is a regular expression over row pattern variables. +The DEFINE clause specifies boolean conditions that determine whether each +variable evaluates to true for the current row. + +Example: + + PATTERN (A+ B) + DEFINE A AS price > PREV(price), + B AS price < PREV(price) + +This pattern matches "a span where prices rise consecutively then drop." + +Chapter II Overall Processing Pipeline +============================================================================ + +RPR processing is divided into three phases: + + +------------------------------------------------------------+ + | 1. Parsing (Parser) | + | SQL text -> PATTERN AST + DEFINE expression tree | + | | + | 2. Compilation (Optimizer/Planner) | + | PATTERN AST -> optimization -> flat NFA element array | + | | + | 3. Execution (Executor) | + | Row-by-row matching via NFA simulation | + +------------------------------------------------------------+ + +Each phase uses independent data structures, and the interfaces between +phases are well-defined: + + Parser -> Planner: WindowClause.rpPattern (RPRPatternNode tree) + WindowClause.defineClause (TargetEntry list) + + Planner -> Executor: WindowAgg.rpPattern (RPRPattern struct) + WindowAgg.defineClause (TargetEntry list) + +Chapter III Parsing Phase +============================================================================ + +III-1. Entry Point + + transformWindowDefinitions() (parse_clause.c) + +-- transformRPR() (parse_rpr.c) + +transformRPR() is invoked when RPCommonSyntax is present and performs the +following: + + (1) Frame option validation + - Only ROWS is allowed (RANGE, GROUPS are not) + - The start boundary must be CURRENT ROW + - EXCLUDE option is not allowed + + (2) Transcription to WindowClause + - Copies rpPattern, rpSkipTo, initial fields + + (3) DEFINE clause transformation (transformDefineClause) + +III-2. PATTERN AST + +The parser transforms the PATTERN clause into an RPRPatternNode tree. +Each node has one of the following four types: + + RPR_PATTERN_VAR Variable reference. Name stored in varName field. + RPR_PATTERN_SEQ Concatenation. Children node list in children. + RPR_PATTERN_ALT Alternation. Branch node list in children. + RPR_PATTERN_GROUP Group (parentheses). Body node list in children. + +All nodes have min/max fields to express quantifiers: + + A -> VAR(A, min=1, max=1) + A+ -> VAR(A, min=1, max=INF) + A* -> VAR(A, min=0, max=INF) + A? -> VAR(A, min=0, max=1) + A{3,5} -> VAR(A, min=3, max=5) + +If the reluctant field is true, the quantifier is reluctant (non-greedy). +(RPRPatternNode.reluctant is bool; reluctant_location is the separate +ParseLoc field holding the '?' token position, or -1 if absent.) + +Example: PATTERN ((A+ B) | C*) + + ALT + +-- SEQ + | +-- VAR(A, 1, INF) + | +-- VAR(B, 1, 1) + +-- VAR(C, 0, INF) + +III-3. DEFINE Clause Transformation + +transformDefineClause() processes each DEFINE variable as follows: + + (1) Checks for duplicate variable names + (2) Transforms the expression via transformExpr() + (3) Extracts Var nodes via pull_var_clause() and ensures each is + present in the query targetlist, so the planner propagates the + referenced columns through the plan tree + (4) Wraps in a TargetEntry with the variable name set in resname + +After all variables are processed: + (5) Coerces each expression to Boolean type (coerce_to_boolean) + +Variables that are used in PATTERN but not defined in DEFINE are implicitly +evaluated as TRUE (matching all rows). + +Chapter IV Compilation Phase +============================================================================ + +IV-1. Entry Point + + create_windowagg_plan() (createplan.c) + +-- buildDefineVariableList() Build variable name list from DEFINE + +-- buildRPRPattern() NFA compilation (6 phases) + +IV-2. The 6 Phases of buildRPRPattern() + + Phase 1: AST optimization (optimizeRPRPattern) + Phase 2: Statistics collection (scanRPRPattern) + Phase 3: Memory allocation (allocateRPRPattern) + Phase 4: NFA element fill (fillRPRPattern) + Phase 5: Finalization (finalizeRPRPattern) + Phase 6: Absorbability analysis (computeAbsorbability) + +IV-3. Phase 1: AST Optimization + +After copying the parser-generated AST, the following optimizations are +applied: + + (a) SEQ flattening: Unwrap nested SEQ nodes + SEQ(A, SEQ(B, C)) -> SEQ(A, B, C) + + (b) Consecutive variable merging: Merge consecutive occurrences of the + same variable into a single quantifier + A A -> A{2} + A{2,3} A{1,2} -> A{3,5} + + (c) Consecutive group merging: Merge repeated identical groups + (A B)+ (A B)+ -> (A B){2,INF} + + (d) Consecutive ALT merging: Merge repeated identical ALT nodes + (A | B) (A | B) (A | B) -> (A | B){3} + + (e) Prefix/suffix absorption: Absorb identical sequences before/after + a group + A B (A B)+ -> (A B){2,INF} + + (f) ALT flattening and deduplication + (A | (B | C)) -> (A | B | C) + (A | B | A) -> (A | B) + + (g) Quantifier multiplication: Collapse nested quantifiers when safe + (A+)+ -> A+ + (A{2,3}){5} -> A{10,15} + + (h) Single-child unwrap + SEQ(A) -> A, (A){1,1} -> A + +IV-4. Phase 4: NFA Element Array Generation + +Transforms the optimized AST into a flat array of RPRPatternElement. +This is the core data structure used for NFA simulation at runtime. + +RPRPatternElement struct (16 bytes): + + Field Size Description + --------------------------------------------------------- + varId 1B Variable ID (0-251) or control code (252-255) + depth 1B Group nesting depth + flags 1B Bit flags (see below) + reserved 1B Padding + min 4B Quantifier lower bound + max 4B Quantifier upper bound + next 2B Next element index (sequential flow) + jump 2B Branch target index (for ALT/GROUP) + +Control codes: + + RPR_VARID_BEGIN (252) Group start marker + RPR_VARID_END (253) Group end marker + RPR_VARID_ALT (254) Alternation start marker + RPR_VARID_FIN (255) Pattern completion marker + +Element flags (1 byte, bitmask): + + 0x01 RPR_ELEM_RELUCTANT (VAR, BEGIN, END) + Non-greedy quantifier. Prefers shorter match: try exit-loop + first, then repeat. Set on VAR for simple (A+?), + on BEGIN+END for group ((...)+?). + + 0x02 RPR_ELEM_EMPTY_LOOP (END) + Group body can produce empty match (all children nullable). + Creates a fast-forward exit clone alongside the normal + loop-back so cycle detection doesn't kill legitimate + matches. (IV-4b) + + 0x04 RPR_ELEM_ABSORBABLE_BRANCH (VAR, BEGIN, END, ALT) + Element lies within an absorbable region. Used at runtime + to track whether the current NFA state is in an absorbable + context. + + 0x08 RPR_ELEM_ABSORBABLE (VAR, END) + Absorption judgment point. Where to compare consecutive + iterations for absorption. + - Simple unbounded VAR (A+): set on the VAR itself + - Unbounded GROUP ((A B)+): set on the END element only + + Accessor macros: + RPRElemIsReluctant(e) (e)->flags & 0x01 + RPRElemCanEmptyLoop(e) (e)->flags & 0x02 + RPRElemIsAbsorbableBranch(e) (e)->flags & 0x04 + RPRElemIsAbsorbable(e) (e)->flags & 0x08 + +Example: PATTERN (A+ B | C) + + AST: ALT(SEQ(VAR(A,1,INF), VAR(B,1,1)), VAR(C,1,1)) + + Compilation result: + + idx varId depth min max next jump Description + ------------------------------------------------------------ + 0 ALT 0 1 1 1 -1 Alternation start + 1 A(0) 1 1 INF 2 3 Branch 1: A+ + 2 B(1) 1 1 1 4 -1 Branch 1: B -> FIN + 3 C(2) 1 1 1 4 -1 Branch 2: C -> FIN + 4 FIN 0 1 1 -1 -1 Pattern completion + + - idx 0: ALT marker. next(=1) is the start of the first branch + - idx 1: Variable A. next(=2) is B, jump(=3) is the start of the second + branch + - idx 2: Variable B. next(=4) is FIN + - idx 3: Variable C. next(=4) is FIN + - idx 4: FIN marker. Match completion signal + +Roles of next and jump: + + - next: The next element to move to "after consuming" the current element. + For VAR, the next position after a successful match. + For BEGIN/END, the next position inside/outside the group. + + - jump: The element to "skip to." + In ALT, a jump from one branch to the next branch. + In BEGIN, a skip path to END+1 (for groups with min=0). + In END, a loop-back to the start of the group body. + +Example: PATTERN ((A B)+) + + idx varId depth min max next jump Description + -------------------------------------------------------------- + 0 BEGIN 0 1 INF 1 4 Group start + 1 A(0) 1 1 1 2 -1 A + 2 B(1) 1 1 1 3 -1 B + 3 END 0 1 INF 4 1 Group end + 4 FIN 0 1 1 -1 -1 Pattern completion + + - idx 0: BEGIN. next(=1) enters the group body. + jump(=4) skips to after END = FIN (used when min=0). + - idx 3: END. next(=4) exits the group. + jump(=1) loops back to the start of the group body. + +IV-4a. Reluctant Flag (RPR_ELEM_RELUCTANT) + +The reluctant flag is set during Phase 4 (fillRPRPattern) when the AST node +has reluctant == true. It reverses the priority of quantifier expansion at +runtime: + + Greedy (default): try loop-back first, then exit (prefer longer match) + Reluctant: try exit first, then loop-back (prefer shorter match) + +The flag is set on all elements that carry the quantifier: + + Simple VAR (A+?): RPR_ELEM_RELUCTANT on the VAR element + Group ((...)+?): RPR_ELEM_RELUCTANT on BEGIN and END elements + +At runtime (nfa_advance), the flag controls DFS exploration order: + + VAR with quantifier: + Greedy: primary path = next (continue), clone = jump (skip) + Reluctant: primary path = jump (skip), clone = next (continue) + + END element: + Greedy: primary path = jump (loop-back), clone = next (exit) + Reluctant: primary path = next (exit), clone = jump (loop-back) + + BEGIN with min=0: + Greedy: primary path = next (enter group), clone = jump (skip) + Reluctant: primary path = jump (skip), clone = next (enter group) + +The absorption optimization requires greedy quantifiers. Reluctant +quantifiers are excluded from absorbability analysis (see IV-5). + +IV-4b. Empty Loop Flag (RPR_ELEM_EMPTY_LOOP) + +The empty-loop flag is set during Phase 4 (fillRPRPatternGroup) on the END +element when the group body is nullable -- i.e., every path through the +body can match zero rows (all children are nullable). + +Example patterns that trigger this flag: + + (A?)* A is nullable (min=0), so group body is nullable -> END gets flag + (A? B?)+ Both children nullable -> body nullable -> END gets flag + (A | B*) B* is nullable, making the ALT nullable -> END gets flag + +The flag works in conjunction with the empty match cycle detection +(elemIdx visited bitmap). Without this flag, cycle detection alone would +cause legitimate matches to fail. + +Problem example: (A*){2,3} + - Iteration 1: A* consumes all available rows -> count=1, END reached + - Loop-back for iteration 2: A* matches zero rows -> END reached again + - Cycle detection sees the same elemIdx on the same row -> state killed + - count never reaches min(2) -> match fails (incorrect) + +With the RPR_ELEM_EMPTY_LOOP flag, nfa_advance_end creates two paths: +the normal loop-back (which cycle detection will eventually kill) and +a fast-forward exit clone that bypasses the loop entirely. +(See IX-4(c) for detailed runtime behavior.) + +IV-5. Absorbability Analysis (RPR_ELEM_ABSORBABLE) + +Context absorption is an optimization technique that reduces O(n^2) to O(n). +(Runtime behavior is described in Chapter VIII.) + +This phase determines whether the pattern has a structure suitable for the +absorption optimization and sets flags on the relevant elements: + + RPR_ELEM_ABSORBABLE Absorption comparison point + RPR_ELEM_ABSORBABLE_BRANCH Element within an absorbable region + +Eligibility conditions: + + (1) SKIP PAST LAST ROW (not NEXT ROW) + (2) Frame end is UNBOUNDED FOLLOWING + +Structural conditions (isUnboundedStart + computeAbsorbabilityRecursive): + + Case 1: Simple VAR+ (e.g., A+) + -> ABSORBABLE | ABSORBABLE_BRANCH set on the VAR + Case 2: GROUP+ with fixed-length children (min == max, recursively) + e.g., (A B)+, (A B{2})+, ((A (B C){2}){2})+ + -> ABSORBABLE_BRANCH on all elements within the group, + ABSORBABLE | ABSORBABLE_BRANCH on END + + Why this is safe: when every child has min == max, the group + is semantically equivalent to unrolling its body into {1,1} + elements. E.g., (A B{2})+ behaves like (A B B)+. Each + iteration consumes a fixed number of rows, so an earlier + context's count always dominates a later one's (monotonicity). + + Case 3: GROUP+ whose body starts with VAR+ (e.g., (A+ B)+) + -> Recurses from BEGIN into the body, applying Case 1. + ABSORBABLE | ABSORBABLE_BRANCH set on A. + B and END get no flags -> absorption stops once past A. + +Absorbability is determined per-element, not per-pattern. +Absorption comparison is performed only when a state resides at an +element with the RPR_ELEM_ABSORBABLE flag. Once a state leaves the +flagged region, absorption is permanently disabled for that state. + +Through this mechanism, the runtime guarantees monotonicity: +"a context that started earlier always subsumes a context that +started later." + +Chapter V NFA Runtime Data Structures +============================================================================ + +V-1. RPRNFAState -- NFA State + +A single NFA state represents "how far the pattern has progressed." + + Field Description + ----------------------------------------------------------- + elemIdx Index of the current pattern element + counts[] Repetition count per group depth + isAbsorbable Whether the state is in an absorbable region + next Next state in the linked list + +The size of the counts array is rpPattern->maxDepth (= maximum group +nesting depth + 1), allocated as a flexible array member at the end of +the struct. + +Example: In PATTERN ((A B)+ C), a state waiting for B in the 3rd iteration + + Element array: [0:BEGIN(d0) 1:A(d1) 2:B(d1) 3:END(d0) 4:C(d0) 5:FIN] + + elemIdx = 2 (B, depth 1) + counts[0] = 2 (depth 0: depth of END. Group completed 2 iterations) + counts[1] = 1 (depth 1: depth of B. A matched in current iteration) + + Counts are indexed by depth, not by elemIdx. + counts[0] is incremented when passing through END(depth 0), + and the group repetition count is preserved even when + the state is at B(depth 1). + +Definition of two states being "equal": + + Two states are equal if they have the same elemIdx and the same counts + up to the depth of that element. + nfa_states_equal() compares counts[0..elem->depth] using memcmp. + Only counts at or below the depth of the current element are meaningful. + +V-2. RPRNFAContext -- Matching Context + +A single context represents "a matching attempt started from a specific +start row." + + Field Description + --------------------------------------------------------------------- + states Linked list of active NFA states + matchStartRow Row number where matching started + matchEndRow Row number where matching completed + (-1 if incomplete) + lastProcessedRow Last row processed + matchedState State that reached FIN (for greedy fallback) + hasAbsorbableState Whether this context can absorb other contexts + allStatesAbsorbable Whether this context can be absorbed + next, prev Doubly-linked list + +Since the NFA is nondeterministic, multiple states can coexist +simultaneously within a single context. + +Example: In PATTERN (A | B) C, if the first row matches both A and B, +two states coexist within the context: + + State 1: elemIdx=3 (waiting for C, via branch A) + State 2: elemIdx=3 (waiting for C, via branch B) + +In this case, since the (elemIdx, counts) of the two states are equal, +nfa_add_state_unique() retains only State 1 (branch A), which was +added first. +Because DFS processes the first branch of ALT first, the state via A +is registered first, and the state via B is discarded as a duplicate. +This is the preferment guarantee. + +V-3. RPR Fields of WindowAggState + + nfaContext / nfaContextTail Doubly-linked list of active contexts + nfaContextFree Reuse pool for contexts + nfaStateFree Reuse pool for states + nfaVarMatched Per-row cache: varMatched[varId] + nfaVisitedElems Bitmap for cycle detection + nfaStateSize Precomputed size of RPRNFAState + +Memory management: + + States and contexts are managed through their own free lists. + Instead of palloc, they are obtained from the reuse pool, and + returned to the pool upon deallocation. + This reduces the overhead of frequent allocation/deallocation. + +Chapter VI NFA Execution: 3-Phase Model +============================================================================ + +VI-1. Entry Point and Overall Flow + +When the window function processes each row, row_is_in_reduced_frame() +is called. This function determines whether the current row belongs to +a matched frame, and if necessary, calls update_reduced_frame() to +drive the NFA. + +Flow of update_reduced_frame(): + + (1) Find or create a context for the target row + (2) Enter the row processing loop + (3) After the loop ends, record the match result + +Pseudocode of the row processing loop: + + targetCtx = ExecRPRGetHeadContext(pos) + if targetCtx == NULL: + targetCtx = ExecRPRStartContext(pos) + + for currentPos = startPos; targetCtx->states != NULL; currentPos++: + if not nfa_evaluate_row(currentPos): -- row does not exist + ExecRPRFinalizeAllContexts() -- finalize all contexts + ExecRPRCleanupDeadContexts() -- clean up after finalization + break + + ExecRPRProcessRow(currentPos) -- 3-phase processing + ExecRPRStartContext(currentPos + 1) -- pre-create next start point + ExecRPRCleanupDeadContexts() -- remove dead contexts + +Key point: Processing a single row may require processing multiple rows +ahead. Due to the nature of window functions, determining the frame for +row N requires looking at rows beyond N. + +VI-2. Context Creation: ExecRPRStartContext() + +Creates a new context and performs the initial advance. + + (1) Allocate context via nfa_context_alloc() + (2) Set matchStartRow = pos + (3) Create initial state: elemIdx=0 (first pattern element), + counts=all zero + (4) Call nfa_advance(initialAdvance=true) + +The initial advance expands epsilon transitions at the beginning of +the pattern. For example, the initial advance for PATTERN ((A | B) C): + + Start: elemIdx=0 (ALT) + -> Expand ALT branches + -> elemIdx=1 (A) -- VAR, so add state; stop here + -> elemIdx=2 (B) -- VAR, so add state; stop here + + Result: Two states in the context {waiting for A, waiting for B} + +During the initial advance, reaching FIN is not recorded as a match. +This is to prevent empty matches. + +VI-3. Row Evaluation: nfa_evaluate_row() + +Evaluates all variable conditions in the DEFINE clause at once for +the current row. + + for each defineClause[i]: + result = ExecEvalExpr(defineClause[i]) + varMatched[i] = (not null and true) + +To support row navigation operators (PREV, NEXT, FIRST, LAST), +a 1-slot model is used: only ecxt_outertuple is set to the current +row. Navigation is handled by EEOP_RPR_NAV_SET/RESTORE opcodes +emitted during DEFINE expression compilation: + + NAV_SET: save ecxt_outertuple, swap in target row via nav_slot + (evaluate): argument expression reads from swapped slot + NAV_RESTORE: restore original ecxt_outertuple + +Compound navigation (PREV(FIRST()), NEXT(FIRST()), PREV(LAST()), +NEXT(LAST())) is flattened by the parser into a single RPRNavExpr +with a compound kind (RPR_NAV_PREV_FIRST, etc.). The executor +computes the target position in two steps: first the inner reference +point (match_start + N or currentpos - N) with match-range validation, +then the outer adjustment (+/- M) with partition-range validation. +If either step is out of range, the result is NULL. + +nav_slot caches the last fetched position (nav_slot_pos) to avoid +redundant tuplestore lookups when multiple navigation calls target +the same row. + +The varMatched array is referenced later in Phase 1 (Match). + +VI-4. Per-Context Re-evaluation (match_start-dependent variables) + +DEFINE variables that depend on match_start (those containing FIRST, +LAST-with-offset, or compound PREV_FIRST/NEXT_FIRST/PREV_LAST/NEXT_LAST) +are identified at plan time via defineMatchStartDependent. The shared +evaluation in nfa_evaluate_row() uses the head context's matchStartRow +for FIRST/LAST base position. + +When processing a context whose matchStartRow differs from the shared +value, nfa_reevaluate_dependent_vars() temporarily sets nav_match_start +to that context's matchStartRow and re-evaluates only the dependent +variables. The original nav_match_start and currentpos are saved and +restored after re-evaluation. + +Summary of evaluation strategy by navigation content: + + Navigation content evaluation + ------------------------------------------------------- + No navigation shared (once per row) + PREV/NEXT only shared (once per row) + LAST (no offset) shared (once per row) + LAST (with offset) per-context + FIRST (any) per-context + Compound (inner FIRST) per-context + Compound (inner LAST, no off.) shared (once per row) + Compound (inner LAST, w/off.) per-context + +VI-5. Tuplestore Mark and Trim (nodeWindowAgg.c) + +Navigation functions require access to past rows via the tuplestore. +To allow tuplestore_trim() to free rows that are no longer reachable, +the planner computes two offsets (see compute_nav_offsets): + + navMaxOffset (Nav Mark Lookback): + Maximum backward reach from currentpos. Contributed by PREV, + LAST-with-offset, and compound PREV_LAST/NEXT_LAST. + Mark position: currentpos - navMaxOffset. + + navFirstOffset (Nav Mark Lookahead): + Minimum forward offset from match_start. Contributed by FIRST + and compound PREV_FIRST/NEXT_FIRST. Can be negative when + compound PREV_FIRST looks before match_start. + Mark position: oldest_context->matchStartRow + navFirstOffset. + +The actual mark is set to: min(lookback_mark, lookahead_mark). +This ensures all rows reachable by any navigation function are retained. + +When offsets contain non-constant expressions (Param), the planner sets +navMaxOffsetKind/navFirstOffsetKind to RPR_NAV_OFFSET_NEEDS_EVAL and the +executor evaluates them at init time. On overflow, the kind is set to +RPR_NAV_OFFSET_RETAIN_ALL, disabling trim for that dimension. + +VI-6. ExecRPRProcessRow(): 3-Phase Processing + +NFA processing for a single row is divided into three phases: + + +--------------------------------------------+ + | Phase 1: MATCH (convergence) | + | Compare the current row against each VAR | + | state. Remove states that fail to match. | + | | + | Phase 2: ABSORB (absorption) | + | Merge duplicate contexts to prevent | + | state explosion. | + | | + | Phase 3: ADVANCE (expansion) | + | Expand epsilon transitions to prepare | + | for the next row. | + +--------------------------------------------+ + +This ordering is important: + + - Match executes first to "consume the current row." + - Absorb executes immediately after Match, when states have been updated. + - Advance executes last to prepare "states waiting for the next row." + +Chapter VII Phase 1: Match +============================================================================ + +nfa_match() iterates through each state in the context: + + (1) Check whether the state's elemIdx is a VAR element + (2) Compare against the current row using nfa_eval_var_match() + (3) Match success: increment repetition count, retain state + (4) Match failure: remove state + +Match determination (nfa_eval_var_match): + + If varId is within the range of defineVariableList: + Use the value of varMatched[varId] + + If varId exceeds the range (variable not defined in DEFINE): + Unconditionally true (matches all rows) + +Immediate advance for simple VARs: + + For a VAR with min=1, max=1 where the next element is END, + the Match phase processes through END immediately. + This is necessary for accurate state comparison in Phase 2 (Absorb). + + Example: In PATTERN ((A B)+), when A matches, it immediately advances + to B, and when B matches, it immediately advances through END to + complete the group count. This enables absorption comparison with + other contexts. + +Chapter VIII Phase 2: Absorb (Context Absorption) +============================================================================ + +VIII-1. Problem + +In the current implementation, a new context is started for each row +processed. +Applying PATTERN (A+) to 10 rows produces 10 contexts, +each of which tracks state independently. + +If there are N rows, the total number of states becomes O(N^2): + + Context 1 (started at row 1): can match A up to N times + Context 2 (started at row 2): can match A up to N-1 times + ... + Context N (started at row N): can match A 1 time + +VIII-2. Solution: Context Absorption + +Key observation: a context started earlier contains +all matches of a later-started context (monotonicity principle). + +If Context 1 started at row 1 and matched A 5 times, +the state where Context 2 (started at row 2) matched A 4 times +is already contained within Context 1. + +Therefore Context 2 can be "absorbed" into Context 1. + +VIII-3. Absorption Conditions + +Planner-time prerequisites (all must hold for absorption to be enabled): + + (a) SKIP PAST LAST ROW. SKIP TO NEXT ROW creates overlapping + contexts that cannot be safely absorbed. + (b) Unbounded frame (ROWS BETWEEN CURRENT ROW AND UNBOUNDED + FOLLOWING). Limited frames apply differently to each context, + breaking the monotonicity principle. + (c) No match_start-dependent navigation in DEFINE. + + Mechanism: each context has a different matchStartRow, so FIRST + resolves to a different row for each context at the same + currentpos. An earlier context's DEFINE result no longer + subsumes a later one's, making count-dominance comparison + invalid. Rather than comparing matchStartRow at runtime + (which would complicate the absorb path), any match_start + dependency disables absorption entirely. + + Navigation content match_start dep. absorption + ------------------------------------------------------------ + No navigation none safe + PREV/NEXT only none safe + LAST (no offset) none safe + LAST (with offset) boundary check unsafe + FIRST (any) direct unsafe + Compound (inner FIRST) direct unsafe + Compound (inner LAST, no off.) none safe + Compound (inner LAST, w/off.) boundary chk unsafe + +Runtime conditions (evaluated per context pair): + + (1) The pattern is marked as isAbsorbable (see IV-5) + (2) allStatesAbsorbable of the target context is true + (3) An earlier context "covers" all states of the target + +Cover condition (nfa_states_covered): + + A state with the same elemIdx exists in the earlier context, + and the count at that depth is greater than or equal -- then it is covered. + +VIII-4. Dual-Flag Design + +Two boolean flags make the absorption decision efficient: + + hasAbsorbableState (monotonic: only true->false transition possible) + "Does this context have the ability to absorb other contexts?" + true if at least one absorbable state exists. + Transitions to false when states are removed leaving no absorbable + states. + Once false, it never becomes true again. + + allStatesAbsorbable (dynamic: can fluctuate) + "Can this context be absorbed?" + true if all states are in an absorbable region. + Becomes false when a non-absorbable state is added; reverts to true + when it is removed. + +VIII-5. Absorption Order + +nfa_absorb_contexts() traverses from tail (newest) to head (oldest). + + for ctx = tail to head: + if ctx.allStatesAbsorbable: + for older = ctx.prev to head: + if older.hasAbsorbableState: + if nfa_states_covered(older, ctx): + free(ctx) -- absorbed + break + +Since inspection starts from the newest context, the most recently started +(= having the shortest match) context is absorbed first. + +Chapter IX Phase 3: Advance (Epsilon Transition Expansion) +============================================================================ + +IX-1. Overview + +nfa_advance() expands epsilon transitions from each state after Match, +generating "new states waiting for the next row." + +An epsilon transition is a transition that moves without consuming a row: + + - ALT: branch to each alternative + - BEGIN: enter group (or skip if min=0) + - END: loop-back within group (or exit when condition is met) + - FIN: record match completion + - VAR loop/exit: repeat/exit according to the quantifier + +Expansion stops upon reaching a VAR element, and the state is added. +This is because VAR is the element that "will consume the next row." + +IX-2. Processing Order: DFS and Preferment + +advance processes states in lexicographic order, +performing Depth-First Search (DFS) on each state. + +This DFS order is what guarantees the SQL standard's "preferment": + + The branch that appears first in the PATTERN text takes precedence. + +Example: PATTERN (A | B) C + + The first branch A of the ALT takes precedence over the second branch B. + When both A and B can match, the match via A is selected. + +nfa_add_state_unique() prevents duplicate addition of the same state, +so the state added first (= from the preferred branch) is retained. + +IX-3. Routing Function: nfa_route_to_elem() + +All inter-element transitions in the advance phase go through +nfa_route_to_elem(). +This function branches its behavior based on the type of the next element: + + If the next element is VAR: + (1) Add the state to the context (nfa_add_state_unique) + (2) If the VAR has min=0, also add a skip path (recurse via next) + -> Expansion stops here (VAR is the element that "will consume the next + row") + + If the next element is non-VAR (ALT, BEGIN, END, FIN): + -> Recursively call nfa_advance_state() to continue expansion + +With this structure, advance recursively follows epsilon transitions +until reaching a VAR, consistently stopping only at VAR elements. + +IX-4. Per-Element advance Behavior + +(a) ALT (nfa_advance_alt) + + Upon encountering an ALT element, all branches are expanded in order. + The first element of each branch is connected via a jump pointer. + + idx=0 (ALT) -> branch 1 start (next) -> branch 2 start (jump) -> ... + + nfa_advance_state() is recursively called for each branch. + +(b) BEGIN (nfa_advance_begin) + + Handles group entry. + jump points to the element after END (= first element outside the group). + + Greedy (default): + (1) Enter the group body (move via next, reset the count at that depth) + (2) If min=0, also add a group skip path (move via jump) + + Reluctant: + Order reversed -- skip path first, group entry second. + If the skip path reaches FIN, the group entry path is not generated + (shortest match preferred). + +(c) END (nfa_advance_end) + + Handles group termination. This is the core of the repetition logic. + + Let count be the count at the current depth: + + count < min: + Loop-back (move via jump, repeat the group body) + + If the RPR_ELEM_EMPTY_LOOP flag is set: + In addition to loop-back, also add a fast-forward exit path. + This is because the body may produce an empty match, causing count + to never reach min. fast-forward resets counts[depth] to 0 + and exits via next (treating the remaining required iterations + as empty matches). + + min <= count < max: + Greedy: loop-back first, exit second + Reluctant: exit first, loop-back second + If the exit path reaches FIN, loop-back is omitted. + + count >= max: + Unconditional exit (move via next) + + On exit: reset counts[depth] = 0, and if the next element is an outer END, + increment the count at the outer depth. + +(d) VAR (nfa_advance_var) + + Handles repeat/exit for a VAR element with a quantifier. + + Let count be the count at the current depth: + + count < min: + Unconditional loop (stay at the same elemIdx, wait for the next row) + + min <= count < max: + Greedy: loop first, exit (next) second + Reluctant: exit first, loop second + If the exit path reaches FIN, loop is omitted. + + count >= max: + Unconditional exit (move via next) + + On exit: reset counts[depth] = 0. + +(e) FIN + + Match success. The current state is moved to matchedState for recording, + and matchEndRow is set to the current row. + + Upon reaching FIN, all remaining unprocessed states are removed + (early termination). By DFS order, the path that reached FIN first + has the highest preferment, so the rest are inferior paths. + This is the core mechanism that guarantees preferment. + + In SKIP PAST LAST ROW mode, upon reaching FIN, subsequent contexts + that started within the match range are immediately pruned. + +IX-5. State Deduplication: nfa_add_state_unique() + +When adding a new state to a context, it is compared against existing +states; +if an identical state already exists, it is not added. + +Comparison criteria: elemIdx + counts[0..elem->depth] (see V-1) + +This deduplication is the core mechanism that suppresses NFA state +explosion. +Because DFS order causes preferred-branch states to be added first, +identical states from lower-priority branches are automatically discarded. + +IX-6. Cycle Detection: nfaVisitedElems + +When a group body can produce an empty match, +looping back from END may cause an infinite loop. + +Example: PATTERN ((A?)*) + + A? has min=0, so it can pass through without matching. + If the outer group repeats: BEGIN -> A? skip -> END -> BEGIN -> ... + +To prevent this: + + (1) At compile time: set the RPR_ELEM_EMPTY_LOOP flag on the END + of groups whose body is nullable. + The runtime effect of this flag is described in IX-4(c): + when count < min, a fast-forward exit path is added, + resolving the deadlock where count cannot increase due to empty + matches. + + (2) At runtime: initialize the nfaVisitedElems bitmap immediately before + DFS expansion of each state within advance (once per state). + During DFS, epsilon elements (END, ALT, BEGIN) are marked in the + bitmap at nfa_advance_state entry. VAR elements are marked later + when added to the state list (nfa_add_state_unique), so that + legitimate loop-back to the same VAR in a new group iteration + (e.g., END -> ALT -> same VAR) is not blocked. + If a previously visited elemIdx is revisited, that path is terminated. + + Note: the bitmap tracks only elemIdx and does not consider counts. + Therefore, legitimate revisits to the same elemIdx but with different + counts may also be blocked. This only occurs when the group body is + nullable (all paths can match empty), causing END -> loop-back -> + skip -> END within a single DFS. In such cases the END element has + the RPR_ELEM_EMPTY_LOOP flag, so the fast-forward exit (IX-4(c)) + provides an alternative path that bypasses the cycle. + +Chapter X Match Result Processing +============================================================================ + +X-1. Match Result + +RPR tracks the current match result as a single entry in WindowAggState +with four fields: rpr_match_valid, rpr_match_matched, rpr_match_start, +and rpr_match_length. When rpr_match_valid is true, the entry describes +the match result for the position at rpr_match_start: rpr_match_matched +indicates success or failure, and rpr_match_length gives the number of +rows consumed. A match with rpr_match_length 0 represents an empty match +(pattern matched but consumed no rows). When rpr_match_valid is false, +the position has not been evaluated yet (RF_NOT_DETERMINED). + +A row's status against the current match result can be obtained by +calling get_reduced_frame_status(). + +X-2. AFTER MATCH SKIP + +Determines the starting point for the next match attempt after a successful +match: + + SKIP TO NEXT ROW: + New match attempt begins from the row after the match start row. + Overlapping matches are possible. + + SKIP PAST LAST ROW: + New match attempt begins from the row after the match end row. + Only non-overlapping matches are possible. + +X-3. INITIAL vs SEEK + + Standard definition (section 6.12): + INITIAL: "is used to look for a match whose first row is R." + SEEK: "is used to permit a search for the first match anywhere + from R through the end of the full window frame." + In either case, if there is no match, the reduced window frame is empty. + The default is INITIAL. + + Current implementation: + SEEK is not supported (the parser raises an error). + Only INITIAL is supported, searching only for matches starting at each + row position pos. + +X-4. Bounded Frame Handling + + When the frame is bounded (e.g., ROWS BETWEEN CURRENT ROW AND 5 + FOLLOWING), ExecRPRProcessRow receives hasLimitedFrame=true and + frameOffset indicating the upper bound. Before the match phase, + any context whose match has exceeded the frame boundary + (currentPos >= matchStartRow + frameOffset + 1) is finalized early + by forcing a mismatch. This prevents matches from extending beyond + the window frame. The sum is clamped to PG_INT64_MAX on overflow. + + Note that bounded frames also disable context absorption at the + planner level (see VIII-3(b)), since the frame boundary breaks the + monotonicity assumption required for correct absorption. + +Chapter XI Worked Example: Full Execution Trace +============================================================================ + +XI-1. Query + + SELECT company, tdate, price, + first_value(price) OVER w AS start_price, + last_value(price) OVER w AS end_price + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS price > PREV(price), + B AS price < PREV(price) + ); + +XI-2. Data + + Row# tdate price + -------------------------- + 0 2024-01-01 100 + 1 2024-01-02 110 + 2 2024-01-03 120 + 3 2024-01-04 115 + 4 2024-01-05 130 + +XI-3. Compilation Result + + PATTERN (A+ B) -> unchanged after optimization + + idx varId depth min max next jump + ----------------------------------------- + 0 A(0) 0 1 INF 1 -1 A+ + 1 B(1) 0 1 1 2 -1 B + 2 FIN 0 1 1 -1 -1 + + DEFINE: A -> "price > PREV(price)", B -> "price < PREV(price)" + isAbsorbable = true (A+ is a simple unbounded VAR) + +XI-4. Execution Trace + +--- Row 0 (price=100) --- + + update_reduced_frame(0) called. + + Context C0 created (matchStartRow=0). + Initial advance: elemIdx=0(A) -> VAR, so state is added. + C0.states = [{elemIdx=0, counts=[0]}] + + nfa_evaluate_row(0): + A: price(100) > PREV(price) -> no PREV -> false + B: price(100) < PREV(price) -> no PREV -> false + varMatched = [false, false] + + ExecRPRProcessRow(0): + Phase 1 (Match): A(0) state vs varMatched[0]=false -> state removed + C0.states = [] (empty) + + Phase 2 (Absorb): skipped (no states) + Phase 3 (Advance): skipped (no states) + + C0.states is empty, so the loop terminates. + matchEndRow < matchStartRow -> unmatched. + +--- Row 1 (price=110) --- + + update_reduced_frame(1) called. + + Context C1 created (matchStartRow=1). + Initial advance: C1.states = [{elemIdx=0, counts=[0]}] + + nfa_evaluate_row(1): + A: 110 > PREV(100) -> true + B: 110 < PREV(100) -> false + varMatched = [true, false] + + ExecRPRProcessRow(1): + Phase 1 (Match): A(0) match succeeds -> counts[0]++ -> counts=[1] + C1.states = [{elemIdx=0, counts=[1]}] + + Phase 3 (Advance): + State {elemIdx=0, counts=[1]}: A+ (min=1, count=1, max=INF) + count >= min, so: + Greedy -> loop first: keep {elemIdx=0, counts=[1]} + exit: reset counts[0]=0, next(=1) -> {elemIdx=1, + counts=[0]} + C1.states = [{elemIdx=0, counts=[1]}, {elemIdx=1, counts=[0]}] + +--- Row 2 (price=120) --- + + Context C2 created (matchStartRow=2). + Initial advance: C2.states = [{elemIdx=0, counts=[0]}] + + nfa_evaluate_row(2): + A: 120 > PREV(110) -> true + B: 120 < PREV(110) -> false + varMatched = [true, false] + + C1 ExecRPRProcessRow(2): + Phase 1 (Match): + {elemIdx=0, counts=[1]}: A matches -> counts=[2] + {elemIdx=1, counts=[0]}: B does not match -> removed + C1.states = [{elemIdx=0, counts=[2]}] + + C2 ExecRPRProcessRow(2): + Phase 1 (Match): + {elemIdx=0, counts=[0]}: A matches -> counts=[1] + C2.states = [{elemIdx=0, counts=[1]}] + + Phase 2 (Absorb): + Does C1 (started earlier) cover C2? + C1: {elemIdx=0, counts=[2]}, C2: {elemIdx=0, counts=[1]} + Same elemIdx, C1.counts >= C2.counts -> covered + C2 absorbed. -> removed. + + Phase 3 (Advance): + {elemIdx=0, counts=[2]}: Greedy -> loop + exit + Loop: {elemIdx=0, counts=[2]} + Exit: reset counts[0]=0, next(=1) -> {elemIdx=1, counts=[0]} + C1.states = [{elemIdx=0, counts=[2]}, {elemIdx=1, counts=[0]}] + + Context C3 created (matchStartRow=3). + +--- Row 3 (price=115) --- + + nfa_evaluate_row(3): + A: 115 > PREV(120) -> false + B: 115 < PREV(120) -> true + varMatched = [false, true] + + ExecRPRProcessRow(3): + Phase 1 (Match): + {elemIdx=0, counts=[2]}: A does not match -> removed + {elemIdx=1, counts=[0]}: B matches -> counts=[1] + C1.states = [{elemIdx=1, counts=[1]}] + + Phase 3 (Advance): + {elemIdx=1, counts=[1]}: B (min=1, max=1) + count(1) >= max(1) -> unconditional exit + Reset counts[0]=0, next = 2 (FIN) + FIN reached -> matchEndRow = 3, matchedState recorded. + Early termination: no remaining states, so completed immediately. + C1.states = [] (empty after reaching FIN) + + C1.states is empty and matchEndRow=3 >= matchStartRow=1 -> match succeeds. + + rpr_match_start = 1, rpr_match_length = 3 + +--- Row 4 (price=130) --- + + update_reduced_frame(4) called. + C3 was already created but matchStartRow=3, so it is not applicable. + New context C4 created (matchStartRow=4). + + nfa_evaluate_row(4): + A: 130 > PREV(115) -> true + B: 130 < PREV(115) -> false + + ... No subsequent rows, so ExecRPRFinalizeAllContexts() is called. + Match incomplete -> unmatched. + +XI-5. Final Result + + Row 0: unmatched -> frame = the row itself + Row 1: match head -> frame = rows 1 through 3 + Row 2: inside match -> skipped + Row 3: inside match -> skipped + Row 4: unmatched -> frame = the row itself + +Chapter XII Summary of Key Design Decisions +============================================================================ + +XII-1. Flat Array vs Tree-Based NFA + + Choice: Flat array (RPRPatternElement[]) + + Rationale: + - Cache-friendly: 16-byte fixed size, contiguous memory + - Index-based references: 2-byte indices instead of pointers + - Easy to serialize: can use memcpy when passing to plan nodes + +XII-2. Forward-only Execution vs Backtracking + + Choice: Forward-only (state set tracking) + + Rationale: + - Backtracking takes exponential time in the worst case + - NFA simulation guarantees polynomial time + - DFS order naturally guarantees preferment. + Greedy/reluctant per quantifier requires only reversing the DFS order + - Window functions receive sorted rows sequentially. + Forward-only fits directly into this pipeline, + whereas backtracking requires re-fetching previous rows + - DEFINE conditions are SQL expressions (PREV, RUNNING aggregates, etc.) + with high re-evaluation cost. Forward-only requires only one evaluation + per row + +XII-3. Per-Context Management + + Choice: Independent context per start row + + Rationale: + - Supports overlapping matches under SKIP TO NEXT ROW + - Determines the frame for each row independently + - Absorption optimization can eliminate redundant contexts in O(n) + +XII-4. Memory Pool Management + + Choice: Custom free list + + Rationale: + - NFA states are created and destroyed in large numbers per row + - Avoids palloc/pfree overhead + - State size is variable (counts[] array), but within a single query + maxDepth is fixed, so all states have the same size + +XII-5. Execution Optimization Summary + + The following optimizations make the NFA simulation practical. + + -- Compile-time -- + + (1) AST Optimization (IV-3) + + Simplifies the AST before converting the pattern to an NFA. + Reduces the number of NFA elements through consecutive variable + merging (A A -> A{2}), SEQ flattening, quantifier multiplication, + and other transformations. + + Significance: Reducing the element count directly shrinks the state + space, decreasing the cost of all subsequent runtime phases (match, + absorb, advance). + + -- Runtime: advance phase -- + + (2) Group Skip (IX-4(b)) + + At the BEGIN of a group with min=0, uses jump to skip the entire + group. Moves directly to the first element outside the group without + exploring the group body. Greedy enters then skips; Reluctant skips + then enters. + + Significance: For optional groups (min=0), immediately generates + a skip path without exploring the body, avoiding unnecessary DFS + expansion. + + (3) State Deduplication (IX-5) + + During advance, DFS may generate states with the same (elemIdx, + counts) combination through multiple paths. Additionally, for + group absorption, nfa_match performs inline advance from bounded + VARs (count >= max) within the absorbable region (ABSORBABLE_BRANCH) + through END chains to reach the judgment point (ABSORBABLE END). + This process can also produce duplicate states reaching the same END. + nfa_add_state_unique() blocks duplicate addition of identical states + in both cases. + + Significance: Prevents exponential growth of the state count in + ALT branches and quantifier expansion. Since DFS order causes the + preferred branch's state to be registered first, identical states + from lower-priority branches are automatically discarded, thereby + also guaranteeing preferment. + + (4) Cycle Detection and Fast-Forward (IX-6, IX-4(c)) + + When a nullable group body (e.g., A?) repeats empty matches, + the END -> BEGIN loop-back can continue indefinitely. + + Two mechanisms resolve this: + - A visited bitmap (nfaVisitedElems) blocks revisitation of the + same element, preventing infinite loops (safety) + - At an END with the RPR_ELEM_EMPTY_LOOP flag set, when + count < min, the remaining required iterations are treated as + empty matches and a fast-forward exit path out of the group is + added (correctness) + + Significance: Cycle detection guarantees termination, and + fast-forward guarantees that the min condition is satisfied. + Without these, patterns containing nullable groups would fall + into infinite loops or fail to match. + + (5) Match Pruning (IX-4(e)) + + When a state reaches FIN during advance, all remaining unprocessed + states of that context are removed. Because of DFS order, the path + that reaches FIN first has the highest preferment, so the remaining + paths are inferior. + + Significance: Once the best match is determined, exploration of + inferior paths is immediately terminated. This mechanism achieves + both preferment guarantees and performance optimization. + + -- Runtime: inter-context -- + + (6) Early Termination (SKIP PAST LAST ROW) + + In SKIP PAST LAST ROW mode, when a match is found, subsequent + contexts whose start rows fall within the match range are pruned + immediately without further processing. + In SKIP TO NEXT ROW mode, overlapping contexts are preserved + because each row requires its own independent match. + + Significance: Prunes subsequent contexts whose start rows overlap + with a prior match range, avoiding unnecessary processing. + + (7) Context Absorption (Chapter VIII) + + If an independent context is created for each row, O(n^2) states + accumulate. By exploiting the monotonicity that an earlier-started + context subsumes the states of a later-started context, redundant + contexts are eliminated early. + + Absorbability is determined per-element; comparison is performed + only at elements with the RPR_ELEM_ABSORBABLE flag (see IV-5). + + Significance: Keeps the number of active contexts at a constant + level, achieving O(n^2) -> O(n) time complexity. Without this, + performance degrades sharply on long partitions. + +Appendix A. Key Function Index +============================================================================ + + Function File Role + -------------------------------------------------------------------------- + transformRPR parse_rpr.c Parser entry point + transformDefineClause parse_rpr.c DEFINE transformation + collectPatternVariables rpr.c Variable collection + buildDefineVariableList rpr.c DEFINE variable list + buildRPRPattern rpr.c NFA compilation main + optimizeRPRPattern rpr.c AST optimization + fillRPRPattern rpr.c NFA element generation + finalizeRPRPattern rpr.c Finalization + computeAbsorbability rpr.c Absorption analysis + update_reduced_frame nodeWindowAgg.c Execution main loop + nfa_evaluate_row nodeWindowAgg.c DEFINE evaluation + ExecRPRStartContext execRPR.c Context creation + ExecRPRProcessRow execRPR.c 3-phase processing + nfa_match execRPR.c Phase 1 + nfa_absorb_contexts execRPR.c Phase 2 + nfa_advance execRPR.c Phase 3 + nfa_advance_state execRPR.c Per-state branching + nfa_route_to_elem execRPR.c Element routing + nfa_advance_alt execRPR.c ALT handling + nfa_advance_begin execRPR.c BEGIN handling + nfa_advance_end execRPR.c END handling + nfa_advance_var execRPR.c VAR handling + nfa_add_state_unique execRPR.c Deduplication + nfa_states_covered execRPR.c Absorption check + nfa_reevaluate_dependent_vars execRPR.c Per-context re-eval + ExecRPRGetHeadContext execRPR.c Context lookup + ExecRPRFreeContext execRPR.c Context deallocation + ExecRPRCleanupDeadContexts execRPR.c Dead context cleanup + ExecRPRFinalizeAllContexts execRPR.c Partition-end finalize + ExecRPRRecordContextSuccess execRPR.c Stats: match success + ExecRPRRecordContextFailure execRPR.c Stats: match failure + compute_nav_offsets createplan.c Trim offset computation + +Appendix B. Data Structure Relationship Diagram +============================================================================ + + Parser Layer + -------- + RPCommonSyntax + |--- rpSkipTo: RPSkipTo + |--- initial: bool + +--- rpPattern: RPRPatternNode* (tree) + |--- nodeType: VAR | SEQ | ALT | GROUP + |--- min, max: quantifier + |--- varName: variable name (VAR only) + +--- children: List* (SEQ/ALT/GROUP only) + + Planner Layer + ---------- + WindowAgg (plan node) + |--- rpSkipTo: RPSkipTo + |--- defineClause: List + +--- rpPattern: RPRPattern* + |--- numVars: int + |--- varNames: char** + |--- maxDepth: RPRDepth + |--- isAbsorbable: bool + |--- numElements: int + +--- elements: RPRPatternElement[] (flat array) + |--- varId (1B) + |--- depth (1B) + |--- flags (1B) + |--- reserved (1B) + |--- min, max (4B + 4B) + +--- next, jump (2B + 2B) + + Executor Layer + ---------- + WindowAggState + |--- rpSkipTo: RPSkipTo (AFTER MATCH SKIP mode) + |--- rpPattern: RPRPattern* (copied from plan) + |--- defineVariableList: List (variable names, DEFINE order) + |--- defineClauseList: List + |--- nfaVarMatched: bool[] (per-row cache) + |--- nfaVisitedElems: bitmapword* (cycle detection) + |--- nfaStateSize: Size (pre-calculated RPRNFAState allocation size) + |--- nfaContext <-> nfaContextTail (doubly-linked list) + | +--- RPRNFAContext + | |--- states: RPRNFAState* (linked list) + | | |--- elemIdx + | | |--- counts[] + | | +--- isAbsorbable + | |--- matchStartRow, matchEndRow + | |--- lastProcessedRow + | |--- matchedState (cloned on FIN arrival) + | |--- hasAbsorbableState + | +--- allStatesAbsorbable + |--- nfaContextFree (recycling pool) + +--- nfaStateFree (recycling pool) + +Appendix C. NFA Element Array Examples +============================================================================ + +C-1. PATTERN (A B C) + + idx varId depth min max next jump + ------------------------------------------ + 0 A 0 1 1 1 -1 + 1 B 0 1 1 2 -1 + 2 C 0 1 1 3 -1 + 3 FIN 0 1 1 -1 -1 + +C-2. PATTERN (A+ B*) + + idx varId depth min max next jump flags + ------------------------------------------------------------------------ + 0 A 0 1 INF 1 -1 ABSORBABLE | ABSORBABLE_BRANCH + 1 B 0 0 INF 2 -1 + 2 FIN 0 1 1 -1 -1 + + Only A+ is the absorption point (Case 1). Once past A, + absorption is permanently disabled for that state. + +C-3. PATTERN (A | B | C) + + idx varId depth min max next jump + ---------------------------------------- + 0 ALT 0 1 1 1 -1 alternation start + 1 A 1 1 1 4 2 branch 1 -> FIN, jump -> branch 2 + 2 B 1 1 1 4 3 branch 2 -> FIN, jump -> branch 3 + 3 C 1 1 1 4 -1 branch 3 -> FIN + 4 FIN 0 1 1 -1 -1 + +C-4. PATTERN ((A B)+ C) + + idx varId depth min max next jump flags + -------------------------------------------------------------------------- + 0 BEGIN 0 1 INF 1 4 ABSORBABLE_BRANCH + 1 A 1 1 1 2 -1 ABSORBABLE_BRANCH + 2 B 1 1 1 3 -1 ABSORBABLE_BRANCH + 3 END 0 1 INF 4 1 ABSORBABLE | ABSORBABLE_BRANCH + 4 C 0 1 1 5 -1 + 5 FIN 0 1 1 -1 -1 + + Case 2: GROUP+ with {1,1} body VARs. A, B are branches; + END is the absorption point. Compare with C-6 (Case 3). + +C-5. PATTERN ((A | B)+? C) + + idx varId depth min max next jump flags + ------------------------------------------------------------------- + 0 BEGIN 0 1 INF 1 5 RELUCTANT, group start + 1 ALT 1 1 1 2 -1 alternation start + 2 A 2 1 1 4 3 branch 1 + 3 B 2 1 1 4 -1 branch 2 + 4 END 0 1 INF 5 1 RELUCTANT, group end + 5 C 0 1 1 6 -1 + 6 FIN 0 1 1 -1 -1 + +C-6. PATTERN ((A+ B)+ C) -- Absorbability flag example + + idx varId depth min max next jump flags + --------------------------------------------------------------------------- + 0 BEGIN 0 1 INF 1 4 ABSORBABLE_BRANCH, group start + 1 A 1 1 INF 2 -1 ABSORBABLE | ABSORBABLE_BRANCH + 2 B 1 1 1 3 -1 + 3 END 0 1 INF 4 1 group end + 4 C 0 1 1 5 -1 + 5 FIN 0 1 1 -1 -1 + + Recurses from BEGIN into the body -> A matches Case 1 (simple VAR+). + A gets ABSORBABLE | ABSORBABLE_BRANCH, BEGIN gets ABSORBABLE_BRANCH. + B and END get no flags -> absorption stops once the state advances to B. + (See IV-5 Case 3) + +C-7. PATTERN ((A+ B | C*)+ D) -- Per-branch absorption in ALT + + idx varId depth min max next jump flags + --------------------------------------------------------------------------- + 0 BEGIN 0 1 INF 1 6 ABSORBABLE_BRANCH + 1 ALT 1 1 1 2 -1 ABSORBABLE_BRANCH + 2 A 2 1 INF 3 4 ABSORBABLE | ABSORBABLE_BRANCH + 3 B 2 1 1 5 -1 + 4 C 2 0 INF 5 -1 ABSORBABLE | ABSORBABLE_BRANCH + 5 END 0 1 INF 6 1 EMPTY_LOOP + 6 D 0 1 1 7 -1 + 7 FIN 0 1 1 -1 -1 + + ALT branches are checked independently for absorbability. + Branch 1: A+ matches Case 1 -> A gets ABSORBABLE. B has no flag. + Branch 2: C* matches Case 1 -> C gets ABSORBABLE. + Both A and C get ABSORBABLE_BRANCH as part of their respective branch + paths. + END has EMPTY_LOOP: branch 2 (C*) is nullable, making the group body + nullable. + BEGIN and ALT get ABSORBABLE_BRANCH (on the path to absorbable elements). + +============================================================================ + End of document +============================================================================ diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index 77229141b38..8d8da67e79f 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -1222,6 +1222,98 @@ ExecInitExprRec(Expr *node, ExprState *state, break; } + case T_RPRNavExpr: + { + /* + * RPR navigation functions (PREV/NEXT/FIRST/LAST) are + * compiled into EEOP_RPR_NAV_SET / EEOP_RPR_NAV_RESTORE + * opcodes instead of a normal function call. The SET opcode + * swaps ecxt_outertuple to the target row, the argument + * expression is compiled normally (reads from the swapped + * slot), and the RESTORE opcode restores the original slot. + * + * Default offset when offset_arg is NULL: PREV/NEXT: 1 + * (physical offset from currentpos) FIRST/LAST: 0 (logical + * offset from match boundary) + */ + RPRNavExpr *nav = (RPRNavExpr *) node; + WindowAggState *winstate; + + Assert(state->parent && IsA(state->parent, WindowAggState)); + winstate = (WindowAggState *) state->parent; + + /* Emit SET opcode: swap slot to target row */ + scratch.opcode = EEOP_RPR_NAV_SET; + scratch.d.rpr_nav.winstate = winstate; + scratch.d.rpr_nav.kind = nav->kind; + + if (nav->kind >= RPR_NAV_PREV_FIRST) + { + /* + * Compound navigation: allocate array of 2 for inner [0] + * and outer [1] offsets. + */ + Datum *offset_values = palloc_array(Datum, 2); + bool *offset_isnulls = palloc_array(bool, 2); + + /* Inner offset (default 0 for FIRST/LAST) */ + if (nav->offset_arg != NULL) + ExecInitExprRec(nav->offset_arg, state, + &offset_values[0], &offset_isnulls[0]); + else + { + offset_values[0] = Int64GetDatum(0); + offset_isnulls[0] = false; + } + + /* Outer offset (default 1 for PREV/NEXT) */ + if (nav->compound_offset_arg != NULL) + ExecInitExprRec(nav->compound_offset_arg, state, + &offset_values[1], &offset_isnulls[1]); + else + { + offset_values[1] = Int64GetDatum(1); + offset_isnulls[1] = false; + } + + scratch.d.rpr_nav.offset_value = offset_values; + scratch.d.rpr_nav.offset_isnull = offset_isnulls; + } + else if (nav->offset_arg != NULL) + { + /* Simple navigation with explicit offset */ + Datum *offset_value = palloc_object(Datum); + bool *offset_isnull = palloc_object(bool); + + ExecInitExprRec(nav->offset_arg, state, + offset_value, offset_isnull); + scratch.d.rpr_nav.offset_value = offset_value; + scratch.d.rpr_nav.offset_isnull = offset_isnull; + } + else + { + /* Simple navigation with default offset */ + scratch.d.rpr_nav.offset_value = NULL; + scratch.d.rpr_nav.offset_isnull = NULL; + } + + ExprEvalPushStep(state, &scratch); + + /* Compile the argument expression normally */ + ExecInitExprRec(nav->arg, state, resv, resnull); + + /* Emit RESTORE opcode: restore original slot */ + scratch.opcode = EEOP_RPR_NAV_RESTORE; + scratch.resvalue = resv; + scratch.resnull = resnull; + scratch.d.rpr_nav.winstate = winstate; + get_typlenbyval(nav->resulttype, + &scratch.d.rpr_nav.resulttyplen, + &scratch.d.rpr_nav.resulttypbyval); + ExprEvalPushStep(state, &scratch); + break; + } + case T_FuncExpr: { FuncExpr *func = (FuncExpr *) node; diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 0634af964a9..324b9a962a8 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -57,11 +57,13 @@ #include "postgres.h" #include "access/heaptoast.h" +#include "common/int.h" #include "access/tupconvert.h" #include "catalog/pg_type.h" #include "commands/sequence.h" #include "executor/execExpr.h" #include "executor/nodeSubplan.h" +#include "executor/nodeWindowAgg.h" #include "funcapi.h" #include "miscadmin.h" #include "nodes/miscnodes.h" @@ -586,6 +588,8 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) &&CASE_EEOP_WINDOW_FUNC, &&CASE_EEOP_MERGE_SUPPORT_FUNC, &&CASE_EEOP_SUBPLAN, + &&CASE_EEOP_RPR_NAV_SET, + &&CASE_EEOP_RPR_NAV_RESTORE, &&CASE_EEOP_AGG_STRICT_DESERIALIZE, &&CASE_EEOP_AGG_DESERIALIZE, &&CASE_EEOP_AGG_STRICT_INPUT_CHECK_ARGS, @@ -2013,6 +2017,24 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) EEO_NEXT(); } + /* RPR navigation: swap slot to target row */ + EEO_CASE(EEOP_RPR_NAV_SET) + { + ExecEvalRPRNavSet(state, op, econtext); + outerslot = econtext->ecxt_outertuple; + + EEO_NEXT(); + } + + /* RPR navigation: restore slot to original row */ + EEO_CASE(EEOP_RPR_NAV_RESTORE) + { + ExecEvalRPRNavRestore(state, op, econtext); + outerslot = econtext->ecxt_outertuple; + + EEO_NEXT(); + } + /* evaluate a strict aggregate deserialization function */ EEO_CASE(EEOP_AGG_STRICT_DESERIALIZE) { @@ -5988,3 +6010,248 @@ ExecAggPlainTransByRef(AggState *aggstate, AggStatePerTrans pertrans, MemoryContextSwitchTo(oldContext); } + +/* + * Extract compound (outer) offset from step data. + * For compound nav, offset_value is an array: [0]=inner, [1]=outer. + * Returns the outer offset; errors on NULL or negative. + * Default is 1 (like PREV/NEXT implicit offset). + */ +static int64 +rpr_nav_get_compound_offset(ExprEvalStep *op) +{ + int64 val; + + Assert(op->d.rpr_nav.offset_value != NULL); + + if (op->d.rpr_nav.offset_isnull[1]) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("row pattern navigation offset must not be null"))); + + val = DatumGetInt64(op->d.rpr_nav.offset_value[1]); + + if (val < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("row pattern navigation offset must not be negative"))); + + return val; +} + +/* + * Evaluate RPR navigation (PREV/NEXT/FIRST/LAST): swap slot to target row. + * + * Saves the current outertuple into winstate for later restore, computes + * the target row position, fetches the corresponding slot from the + * tuplestore, and replaces econtext->ecxt_outertuple with it. + * + * This is called both from the interpreter inline handler and from + * JIT-compiled expressions via build_EvalXFunc. + */ +void +ExecEvalRPRNavSet(ExprState *state, ExprEvalStep *op, ExprContext *econtext) +{ + WindowAggState *winstate = op->d.rpr_nav.winstate; + int64 offset; + int64 target_pos; + TupleTableSlot *target_slot; + + /* Save current slot for later restore */ + winstate->nav_saved_outertuple = econtext->ecxt_outertuple; + + /* + * Determine the inner offset. NULL or negative offsets are errors per + * the SQL standard. + * + * Default offset when offset_arg is NULL: PREV/NEXT: 1 (standard 5.6.2) + * FIRST/LAST and compound: 0 for inner, 1 for outer + */ + if (op->d.rpr_nav.offset_value != NULL) + { + if (*op->d.rpr_nav.offset_isnull) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("row pattern navigation offset must not be null"))); + + offset = DatumGetInt64(*op->d.rpr_nav.offset_value); + + if (offset < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("row pattern navigation offset must not be negative"))); + } + else + { + /* Default offset: 1 for simple PREV/NEXT, 0 otherwise */ + if (op->d.rpr_nav.kind == RPR_NAV_PREV || + op->d.rpr_nav.kind == RPR_NAV_NEXT) + offset = 1; + else + offset = 0; + } + + /* + * Calculate target position based on navigation direction. On overflow, + * use -1 so that ExecRPRNavGetSlot treats it as out of range. + */ + switch (op->d.rpr_nav.kind) + { + case RPR_NAV_PREV: + if (pg_sub_s64_overflow(winstate->currentpos, offset, &target_pos)) + target_pos = -1; + break; + case RPR_NAV_NEXT: + if (pg_add_s64_overflow(winstate->currentpos, offset, &target_pos)) + target_pos = -1; + break; + case RPR_NAV_FIRST: + /* FIRST: offset from match_start, clamped to currentpos */ + if (pg_add_s64_overflow(winstate->nav_match_start, offset, &target_pos)) + target_pos = -1; + else if (target_pos > winstate->currentpos) + target_pos = -1; /* beyond current match range */ + break; + case RPR_NAV_LAST: + /* LAST: offset backward from currentpos, clamped to match_start */ + if (pg_sub_s64_overflow(winstate->currentpos, offset, &target_pos)) + target_pos = -1; + else if (target_pos < winstate->nav_match_start) + target_pos = -1; /* before match_start */ + break; + + case RPR_NAV_PREV_FIRST: + case RPR_NAV_NEXT_FIRST: + { + int64 compound_offset; + int64 inner_pos; + + /* Inner: match_start + offset */ + if (pg_add_s64_overflow(winstate->nav_match_start, offset, &inner_pos)) + { + target_pos = -1; + break; + } + if (inner_pos > winstate->currentpos || inner_pos < 0) + { + target_pos = -1; + break; + } + + /* Outer offset */ + compound_offset = rpr_nav_get_compound_offset(op); + + /* Apply outer: PREV subtracts, NEXT adds */ + if (op->d.rpr_nav.kind == RPR_NAV_PREV_FIRST) + { + if (pg_sub_s64_overflow(inner_pos, compound_offset, &target_pos)) + target_pos = -1; + } + else + { + if (pg_add_s64_overflow(inner_pos, compound_offset, &target_pos)) + target_pos = -1; + } + } + break; + + case RPR_NAV_PREV_LAST: + case RPR_NAV_NEXT_LAST: + { + int64 compound_offset; + int64 inner_pos; + + /* Inner: currentpos - offset */ + if (pg_sub_s64_overflow(winstate->currentpos, offset, &inner_pos)) + { + target_pos = -1; + break; + } + if (inner_pos < winstate->nav_match_start) + { + target_pos = -1; + break; + } + + /* Outer offset */ + compound_offset = rpr_nav_get_compound_offset(op); + + /* Apply outer: PREV subtracts, NEXT adds */ + if (op->d.rpr_nav.kind == RPR_NAV_PREV_LAST) + { + if (pg_sub_s64_overflow(inner_pos, compound_offset, &target_pos)) + target_pos = -1; + } + else + { + if (pg_add_s64_overflow(inner_pos, compound_offset, &target_pos)) + target_pos = -1; + } + } + break; + default: + elog(ERROR, "unrecognized RPR navigation kind: %d", + op->d.rpr_nav.kind); + break; + } + + /* + * Slot swap elision: if target_pos is the current row, skip the + * tuplestore fetch and slot swap entirely. This benefits LAST(expr), + * PREV(expr, 0), NEXT(expr, 0), and similar cases. + * + * We must still set nav_saved_outertuple (done above) so that + * EEOP_RPR_NAV_RESTORE is a harmless no-op. + */ + if (target_pos == winstate->currentpos) + return; + + /* Fetch target row slot (returns nav_null_slot if out of range) */ + target_slot = ExecRPRNavGetSlot(winstate, target_pos); + + /* + * Update econtext to point to the target slot. Also decompress the new + * slot's attributes since FETCHSOME already ran for the original slot. + * The caller (interpreter or JIT) is responsible for updating any local + * slot cache (e.g. outerslot) from econtext after we return. + */ + slot_getallattrs(target_slot); + econtext->ecxt_outertuple = target_slot; +} + +/* + * Evaluate RPR navigation: restore slot to original row. + * + * Restores econtext->ecxt_outertuple from the saved slot in winstate. + * When slot swap was elided (target == currentpos), this is a harmless + * no-op since saved and current slots are identical. + * The caller is responsible for updating any local slot cache. + * + * For pass-by-reference result types, the result datum points into + * nav_slot's tuple memory. If a subsequent navigation in the same + * expression re-fetches nav_slot for a different position, the old + * tuple is freed, leaving a dangling pointer. We prevent this by + * copying pass-by-ref results into per-tuple memory, which survives + * until the next ResetExprContext. + */ +void +ExecEvalRPRNavRestore(ExprState *state, ExprEvalStep *op, + ExprContext *econtext) +{ + WindowAggState *winstate = op->d.rpr_nav.winstate; + + econtext->ecxt_outertuple = winstate->nav_saved_outertuple; + + /* Stabilize pass-by-ref result against nav_slot re-fetch */ + if (!op->d.rpr_nav.resulttypbyval && + !*op->resnull) + { + MemoryContext oldContext; + + oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); + *op->resvalue = datumCopy(*op->resvalue, + false, + op->d.rpr_nav.resulttyplen); + MemoryContextSwitchTo(oldContext); + } +} diff --git a/src/backend/executor/execRPR.c b/src/backend/executor/execRPR.c new file mode 100644 index 00000000000..242ae9c6dcf --- /dev/null +++ b/src/backend/executor/execRPR.c @@ -0,0 +1,1772 @@ +/*------------------------------------------------------------------------- + * + * execRPR.c + * NFA-based Row Pattern Recognition engine for window functions. + * + * This file implements the NFA execution engine for the ROWS BETWEEN + * PATTERN clause (SQL Standard Feature R020: Row Pattern Recognition in + * Window Functions). + * + * The engine executes the compiled RPRPattern structure directly, avoiding + * regex compilation overhead. It is called by nodeWindowAgg.c and exposes + * the interface declared in executor/execRPR.h. + * + * + * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/executor/execRPR.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "common/int.h" +#include "executor/execRPR.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "optimizer/rpr.h" +#include "utils/memutils.h" + +/* + * For the design and execution model of the NFA engine implemented + * in this file, see src/backend/executor/README.rpr. + */ + +/* Bitmap macros for NFA cycle detection (cf. bitmapset.c, tidbitmap.c) */ +#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) +#define BITNUM(x) ((x) % BITS_PER_BITMAPWORD) + +/* Forward declarations - NFA state management */ +static RPRNFAState *nfa_state_alloc(WindowAggState *winstate); +static void nfa_state_free(WindowAggState *winstate, RPRNFAState *state); +static void nfa_state_free_list(WindowAggState *winstate, RPRNFAState *list); +static RPRNFAState *nfa_state_create(WindowAggState *winstate, int16 elemIdx, + int32 *counts, bool sourceAbsorbable); +static bool nfa_states_equal(WindowAggState *winstate, RPRNFAState *s1, + RPRNFAState *s2); +static bool nfa_add_state_unique(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state); +static void nfa_add_matched_state(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state, int64 matchEndRow); + +/* Forward declarations - NFA context management (internal) */ +static RPRNFAContext *nfa_context_alloc(WindowAggState *winstate); +static void nfa_unlink_context(WindowAggState *winstate, RPRNFAContext *ctx); + +/* Forward declarations - NFA statistics */ +static void nfa_update_length_stats(int64 count, NFALengthStats *stats, int64 newLen); +static void nfa_record_context_skipped(WindowAggState *winstate, int64 skippedLen); +static void nfa_record_context_absorbed(WindowAggState *winstate, int64 absorbedLen); + +/* Forward declarations - NFA absorption */ +static void nfa_update_absorption_flags(RPRNFAContext *ctx); +static bool nfa_states_covered(RPRPattern *pattern, RPRNFAContext *older, + RPRNFAContext *newer); +static void nfa_try_absorb_context(WindowAggState *winstate, RPRNFAContext *ctx); +static void nfa_absorb_contexts(WindowAggState *winstate); + +/* Forward declarations - NFA match and advance */ +static bool nfa_eval_var_match(WindowAggState *winstate, + RPRPatternElement *elem, bool *varMatched); +static void nfa_match(WindowAggState *winstate, RPRNFAContext *ctx, + bool *varMatched); +static void nfa_advance_state(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state, int64 currentPos); +static void nfa_route_to_elem(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state, RPRPatternElement *nextElem, + int64 currentPos); +static void nfa_advance_alt(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state, RPRPatternElement *elem, + int64 currentPos); +static void nfa_advance_begin(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state, RPRPatternElement *elem, + int64 currentPos); +static void nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state, RPRPatternElement *elem, + int64 currentPos); +static void nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state, RPRPatternElement *elem, + int64 currentPos); +static void nfa_advance(WindowAggState *winstate, RPRNFAContext *ctx, + int64 currentPos); + +/* + * NFA-based pattern matching implementation + * + * These functions implement direct NFA execution using the compiled + * RPRPattern structure, avoiding regex compilation overhead. + * + * Execution Flow: match -> absorb -> advance + * ----------------------------------------- + * The NFA execution follows a three-phase cycle for each row: + * + * 1. MATCH (convergence): Evaluate all waiting states against current row. + * States on VAR elements are checked against their defining conditions. + * Failed matches are removed, successful ones may transition forward. + * This is a "convergence" phase - the number of states tends to decrease. + * + * 2. ABSORB: After matching, check if any context can absorb another. + * Context absorption is an optimization that merges equivalent contexts. + * A context can only be absorbed if ALL its states are absorbable. + * + * 3. ADVANCE (divergence): Expand states through epsilon transitions. + * States advance through ALT (alternation), END (group end), and + * optional elements until reaching VAR or FIN elements where they wait. + * This is a "divergence" phase - ALT creates multiple branch states. + * + * Key Design Decisions: + * --------------------- + * - VAR->END transition in match phase: When a simple VAR (max=1) matches + * and the next element is END, we transition immediately in the match + * phase rather than waiting for advance. This is necessary for correct + * absorption: states must be at END to be marked absorbable before the + * absorption check occurs. + * + * - Optional VAR skip paths: When advance lands on a VAR with min=0, + * we create both a waiting state AND a skip state (like ALT branches). + * This ensures patterns like "A B? C" work correctly - we need a state + * waiting for B AND a state that has already skipped to C. + * + * - END->END count increment: When transitioning from one END to another + * END within advance, we must increment the outer END's count. This + * handles nested groups like "((A|B)+)+" correctly - exiting the inner + * group counts as one iteration of the outer group. + * + * - Empty match handling: The initial advance uses currentPos = + * startPos - 1 (before any row is consumed). If FIN is reached via + * epsilon transitions alone, matchEndRow = startPos - 1 < matchStartRow. + * If matchedState is set (FIN was reached), this is an empty match + * (RF_EMPTY_MATCH); otherwise it is unmatched (RF_UNMATCHED). + * For reluctant min=0 patterns (A*?, A??), the skip path reaches + * FIN first and early termination prunes enter paths, yielding an + * immediate empty match result. For greedy patterns (A*), the enter + * path adds VAR states first, then the skip FIN is recorded but VAR + * states survive for later matching. + * + * Context Absorption Runtime: + * --------------------------- + * Absorption uses flags computed at planning time (in rpr.c) and two + * context-level flags maintained at runtime: + * + * State-level: + * state.isAbsorbable: true if state is in the absorbable region. + * - Set at creation: elem->flags & RPR_ELEM_ABSORBABLE_BRANCH + * - At transition: prevAbsorbable && (newElem->flags & ABSORBABLE_BRANCH) + * - Monotonic: once false, stays false forever + * + * Context-level: + * ctx.hasAbsorbableState: can this context absorb others? + * - True if at least one state has isAbsorbable=true + * - Monotonic: true->false only (optimization: skip recalc when false) + * + * ctx.allStatesAbsorbable: can this context be absorbed? + * - True if ALL states have isAbsorbable=true + * - Dynamic: can change false->true (when non-absorbable states die) + * + * Absorption Algorithm: + * For each pair (older Ctx1, newer Ctx2): + * 1. Pre-check: Ctx1.hasAbsorbableState && Ctx2.allStatesAbsorbable + * -> If false, skip (fast filter) + * 2. Coverage check: For each Ctx2 state with isAbsorbable=true, + * find Ctx1 state with same elemIdx and count >= Ctx2.count + * 3. If all Ctx2 absorbable states are covered, absorb Ctx2 + * + * Example: Pattern A+ B + * Row 1: Ctx1 at A (count=1) + * Row 2: Ctx1 at A (count=2), Ctx2 at A (count=1) + * -> Both at same elemIdx (A), Ctx1.count >= Ctx2.count + * -> Ctx2 absorbed + * + * The asymmetric design (Ctx1 needs hasAbsorbable, Ctx2 needs allAbsorbable) + * allows absorption even when Ctx1 has extra non-absorbable states. + */ + +/* + * nfa_state_alloc + * + * Allocate an NFA state, reusing from freeList if available. + * freeList is stored in WindowAggState for reuse across match attempts. + * Uses flexible array member for counts[]. + */ +static RPRNFAState * +nfa_state_alloc(WindowAggState *winstate) +{ + RPRNFAState *state; + + /* Try to reuse from free list first */ + if (winstate->nfaStateFree != NULL) + { + state = winstate->nfaStateFree; + winstate->nfaStateFree = state->next; + } + else + { + /* Allocate in partition context for proper lifetime */ + state = MemoryContextAlloc(winstate->partcontext, winstate->nfaStateSize); + } + + /* Initialize entire state to zero */ + memset(state, 0, winstate->nfaStateSize); + + /* Update statistics */ + winstate->nfaStatesActive++; + winstate->nfaStatesTotalCreated++; + winstate->nfaStatesMax = Max(winstate->nfaStatesMax, + winstate->nfaStatesActive); + + return state; +} + +/* + * nfa_state_free + * + * Return a state to the free list for later reuse. + */ +static void +nfa_state_free(WindowAggState *winstate, RPRNFAState *state) +{ + winstate->nfaStatesActive--; + state->next = winstate->nfaStateFree; + winstate->nfaStateFree = state; +} + +/* + * nfa_state_free_list + * + * Return all states in a list to the free list. + */ +static void +nfa_state_free_list(WindowAggState *winstate, RPRNFAState *list) +{ + RPRNFAState *next; + + for (; list != NULL; list = next) + { + next = list->next; + nfa_state_free(winstate, list); + } +} + +/* + * nfa_state_create + * + * Create a new state with given elemIdx and counts. + * isAbsorbable is computed immediately: inherited AND new element's flag. + * Monotonic property: once false, stays false through all transitions. + * + * Caller is responsible for linking the returned state. + */ +static RPRNFAState * +nfa_state_create(WindowAggState *winstate, int16 elemIdx, + int32 *counts, bool sourceAbsorbable) +{ + RPRPattern *pattern = winstate->rpPattern; + int maxDepth = pattern->maxDepth; + RPRNFAState *state = nfa_state_alloc(winstate); + RPRPatternElement *elem = &pattern->elements[elemIdx]; + + state->elemIdx = elemIdx; + if (counts != NULL && maxDepth > 0) + memcpy(state->counts, counts, sizeof(int32) * maxDepth); + + /* + * Compute isAbsorbable immediately at transition time. isAbsorbable = + * sourceAbsorbable && (elem->flags & ABSORBABLE_BRANCH) Monotonic: once + * false, stays false (can't re-enter absorbable region). + */ + state->isAbsorbable = sourceAbsorbable && RPRElemIsAbsorbableBranch(elem); + + return state; +} + +/* + * nfa_states_equal + * + * Check if two states are equivalent (same elemIdx and counts). + */ +static bool +nfa_states_equal(WindowAggState *winstate, RPRNFAState *s1, RPRNFAState *s2) +{ + RPRPattern *pattern = winstate->rpPattern; + RPRPatternElement *elem; + int compareDepth; + + if (s1->elemIdx != s2->elemIdx) + return false; + + /* Compare counts up to current element's depth */ + elem = &pattern->elements[s1->elemIdx]; + compareDepth = elem->depth + 1; /* depth 0 needs 1 count, etc. */ + + if (memcmp(s1->counts, s2->counts, sizeof(int32) * compareDepth) != 0) + return false; + + return true; +} + +/* + * nfa_add_state_unique + * + * Add a state to ctx->states at the END, only if no duplicate exists. + * Returns true if state was added, false if duplicate found (state is freed). + * Earlier states have better lexical order (DFS traversal order), so existing wins. + */ +static bool +nfa_add_state_unique(WindowAggState *winstate, RPRNFAContext *ctx, RPRNFAState *state) +{ + RPRNFAState *s; + RPRNFAState *tail = NULL; + + /* Mark VAR in visited before duplicate check to prevent DFS loops */ + winstate->nfaVisitedElems[WORDNUM(state->elemIdx)] |= + ((bitmapword) 1 << BITNUM(state->elemIdx)); + + /* Check for duplicate and find tail */ + for (s = ctx->states; s != NULL; s = s->next) + { + CHECK_FOR_INTERRUPTS(); + + if (nfa_states_equal(winstate, s, state)) + { + /* + * Duplicate found - existing has better lexical order, discard + * new + */ + nfa_state_free(winstate, state); + winstate->nfaStatesMerged++; + return false; + } + tail = s; + } + + /* No duplicate, add at end */ + state->next = NULL; + if (tail == NULL) + ctx->states = state; + else + tail->next = state; + + return true; +} + +/* + * nfa_add_matched_state + * + * Record a state that reached FIN, replacing any previous match. + * + * For SKIP PAST LAST ROW, also prune subsequent contexts whose start row + * falls within the match range, as they cannot produce output rows. + */ +static void +nfa_add_matched_state(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state, int64 matchEndRow) +{ + if (ctx->matchedState != NULL) + nfa_state_free(winstate, ctx->matchedState); + + ctx->matchedState = state; + state->next = NULL; + ctx->matchEndRow = matchEndRow; + + /* Prune contexts that started within this match's range */ + if (winstate->rpSkipTo == ST_PAST_LAST_ROW) + { + int64 skippedLen; + + while (ctx->next != NULL && + ctx->next->matchStartRow <= matchEndRow) + { + RPRNFAContext *nextCtx = ctx->next; + + Assert(nextCtx->lastProcessedRow >= nextCtx->matchStartRow); + skippedLen = nextCtx->lastProcessedRow - nextCtx->matchStartRow + 1; + nfa_record_context_skipped(winstate, skippedLen); + + ExecRPRFreeContext(winstate, nextCtx); + } + } +} + +/* + * nfa_context_alloc + * + * Allocate an NFA context, reusing from free list if available. + */ +static RPRNFAContext * +nfa_context_alloc(WindowAggState *winstate) +{ + RPRNFAContext *ctx; + + if (winstate->nfaContextFree != NULL) + { + ctx = winstate->nfaContextFree; + winstate->nfaContextFree = ctx->next; + } + else + { + /* Allocate in partition context for proper lifetime */ + ctx = MemoryContextAlloc(winstate->partcontext, sizeof(RPRNFAContext)); + } + + ctx->next = NULL; + ctx->prev = NULL; + ctx->states = NULL; + ctx->matchStartRow = -1; + ctx->matchEndRow = -1; + ctx->lastProcessedRow = -1; + ctx->matchedState = NULL; + + /* Initialize two-flag absorption design based on pattern */ + ctx->hasAbsorbableState = winstate->rpPattern->isAbsorbable; + ctx->allStatesAbsorbable = winstate->rpPattern->isAbsorbable; + + /* Update statistics */ + winstate->nfaContextsActive++; + winstate->nfaContextsTotalCreated++; + winstate->nfaContextsMax = Max(winstate->nfaContextsMax, + winstate->nfaContextsActive); + + return ctx; +} + +/* + * nfa_unlink_context + * + * Remove a context from the doubly-linked active context list. + * Updates head (nfaContext) and tail (nfaContextTail) as needed. + */ +static void +nfa_unlink_context(WindowAggState *winstate, RPRNFAContext *ctx) +{ + if (ctx->prev != NULL) + ctx->prev->next = ctx->next; + else + winstate->nfaContext = ctx->next; /* was head */ + + if (ctx->next != NULL) + ctx->next->prev = ctx->prev; + else + winstate->nfaContextTail = ctx->prev; /* was tail */ + + ctx->next = NULL; + ctx->prev = NULL; +} + +/* + * nfa_update_length_stats + * + * Helper function to update min/max/total length statistics. + * Called when tracking match/mismatch/absorbed/skipped lengths. + */ +static void +nfa_update_length_stats(int64 count, NFALengthStats *stats, int64 newLen) +{ + if (count == 1) + { + stats->min = newLen; + stats->max = newLen; + } + else + { + stats->min = Min(stats->min, newLen); + stats->max = Max(stats->max, newLen); + } + stats->total += newLen; +} + +/* + * nfa_record_context_skipped + * + * Record a skipped context in statistics. + */ +static void +nfa_record_context_skipped(WindowAggState *winstate, int64 skippedLen) +{ + winstate->nfaContextsSkipped++; + nfa_update_length_stats(winstate->nfaContextsSkipped, + &winstate->nfaSkippedLen, + skippedLen); +} + +/* + * nfa_record_context_absorbed + * + * Record an absorbed context in statistics. + */ +static void +nfa_record_context_absorbed(WindowAggState *winstate, int64 absorbedLen) +{ + winstate->nfaContextsAbsorbed++; + nfa_update_length_stats(winstate->nfaContextsAbsorbed, + &winstate->nfaAbsorbedLen, + absorbedLen); +} + +/* + * nfa_update_absorption_flags + * + * Update context's absorption flags after state changes. + * + * Two flags control absorption behavior: + * hasAbsorbableState: true if context has at least one absorbable state. + * This flag is monotonic (true -> false only). Once all absorbable states + * die, no new absorbable states can be created through transitions. + * allStatesAbsorbable: true if ALL states in context are absorbable. + * This flag is dynamic and can change false -> true when non-absorbable + * states die off. + * + * Optimization: Once hasAbsorbableState becomes false, both flags remain false + * permanently, so we skip recalculation. + */ +static void +nfa_update_absorption_flags(RPRNFAContext *ctx) +{ + RPRNFAState *state; + bool hasAbsorbable = false; + bool allAbsorbable = true; + + /* + * Optimization: Once hasAbsorbableState becomes false, it stays false. No + * need to recalculate - both flags remain false permanently. + */ + if (!ctx->hasAbsorbableState) + { + ctx->allStatesAbsorbable = false; + return; + } + + /* No states means no absorbable states */ + if (ctx->states == NULL) + { + ctx->hasAbsorbableState = false; + ctx->allStatesAbsorbable = false; + return; + } + + /* + * Iterate through all states to check absorption status. Uses + * state->isAbsorbable which tracks if state is in absorbable region. This + * is different from RPRElemIsAbsorbable(elem) which checks judgment + * point. + */ + for (state = ctx->states; state != NULL; state = state->next) + { + CHECK_FOR_INTERRUPTS(); + + if (state->isAbsorbable) + hasAbsorbable = true; + else + allAbsorbable = false; + } + + ctx->hasAbsorbableState = hasAbsorbable; + ctx->allStatesAbsorbable = allAbsorbable; +} + +/* + * nfa_states_covered + * + * Check if all states in newer context are "covered" by older context. + * + * A newer state is covered when older context has an absorbable state at the + * same pattern element (elemIdx) with count >= newer's count at that depth. + * The covering state must be absorbable because only absorbable states can + * guarantee to produce superset matches. + * + * If all newer states are covered, newer context's eventual matches will be + * a subset of older context's matches, making newer redundant. + */ +static bool +nfa_states_covered(RPRPattern *pattern, RPRNFAContext *older, RPRNFAContext *newer) +{ + RPRNFAState *newerState; + + for (newerState = newer->states; newerState != NULL; newerState = newerState->next) + { + RPRNFAState *olderState; + RPRPatternElement *elem; + int depth; + bool found = false; + + /* All states are absorbable (caller checks allStatesAbsorbable) */ + elem = &pattern->elements[newerState->elemIdx]; + depth = elem->depth; + + /* + * Only compare at absorption judgment points (RPR_ELEM_ABSORBABLE). + * Judgment points are where count-dominance guarantees the newer + * context's future matches are a subset of the older's. + */ + if (!RPRElemIsAbsorbable(elem)) + return false; + + for (olderState = older->states; olderState != NULL; olderState = olderState->next) + { + CHECK_FOR_INTERRUPTS(); + + /* Covering state must also be absorbable */ + if (olderState->isAbsorbable && + olderState->elemIdx == newerState->elemIdx && + olderState->counts[depth] >= newerState->counts[depth]) + { + found = true; + break; + } + } + + if (!found) + return false; + } + + return true; +} + +/* + * nfa_try_absorb_context + * + * Try to absorb ctx (newer) into an older in-progress context. + * Returns true if ctx was absorbed and freed. + * + * Absorption requires three conditions: + * 1. ctx must have all states absorbable (allStatesAbsorbable). + * If ctx has any non-absorbable state, it may produce unique matches. + * 2. older must have at least one absorbable state (hasAbsorbableState). + * Without absorbable states, older cannot cover newer's states. + * 3. All ctx states must be covered by older's absorbable states. + * This ensures older will produce all matches that ctx would produce. + * + * Context list is ordered by creation time (oldest first via prev chain). + * Each row creates at most one context, so earlier contexts have smaller + * matchStartRow values. + */ +static void +nfa_try_absorb_context(WindowAggState *winstate, RPRNFAContext *ctx) +{ + RPRPattern *pattern = winstate->rpPattern; + RPRNFAContext *older; + + /* Early exit: ctx must have all states absorbable */ + if (!ctx->allStatesAbsorbable) + return; + + for (older = ctx->prev; older != NULL; older = older->prev) + { + CHECK_FOR_INTERRUPTS(); + + /* + * By invariant: ctx->prev chain is in creation order (oldest first), + * and each row creates at most one context. So all contexts in this + * chain have matchStartRow < ctx->matchStartRow. + */ + + /* Older must also be in-progress */ + if (older->states == NULL) + continue; + + /* Older must have at least one absorbable state */ + if (!older->hasAbsorbableState) + continue; + + /* Check if all newer states are covered by older */ + if (nfa_states_covered(pattern, older, ctx)) + { + int64 absorbedLen = ctx->lastProcessedRow - ctx->matchStartRow + 1; + + ExecRPRFreeContext(winstate, ctx); + nfa_record_context_absorbed(winstate, absorbedLen); + return; + } + } +} + +/* + * nfa_absorb_contexts + * + * Absorb redundant contexts to reduce memory usage and computation. + * + * For patterns like A+, newer contexts starting later will produce subset + * matches of older contexts with higher counts. By absorbing these redundant + * contexts early, we avoid duplicate work. + * + * Iterates from tail (newest) toward head (oldest) via prev chain. + * Only in-progress contexts (states != NULL) are candidates for absorption; + * completed contexts represent valid match results. + */ +static void +nfa_absorb_contexts(WindowAggState *winstate) +{ + RPRNFAContext *ctx; + RPRNFAContext *nextCtx; + + for (ctx = winstate->nfaContextTail; ctx != NULL; ctx = nextCtx) + { + nextCtx = ctx->prev; + + /* + * Only absorb in-progress contexts; completed contexts are valid + * results + */ + if (ctx->states != NULL) + nfa_try_absorb_context(winstate, ctx); + } +} + +/* + * nfa_eval_var_match + * + * Evaluate if a VAR element matches the current row. + * + * varMatched is a pre-evaluated boolean array indexed by varId, computed + * once per row by evaluating all DEFINE expressions. NULL means no DEFINE + * clauses exist (only possible during early development/testing). + * + * Per SQL:2016 R020, pattern variables not listed in DEFINE are implicitly + * TRUE -- they match every row. This is checked via varId >= list_length. + */ +static bool +nfa_eval_var_match(WindowAggState *winstate, RPRPatternElement *elem, + bool *varMatched) +{ + /* This function should only be called for VAR elements */ + Assert(RPRElemIsVar(elem)); + + if (varMatched == NULL) + return false; + if (elem->varId >= list_length(winstate->defineVariableList)) + return true; + return varMatched[elem->varId]; +} + +/* + * nfa_match + * + * Match phase (convergence): evaluate VAR elements against current row. + * Only updates counts and removes dead states. Minimal transitions. + * + * For VAR elements: + * - matched: count++, keep state (unless count > max) + * - not matched: remove state (exit alternatives already exist from + * previous advance when count >= min was satisfied) + * + * For VARs that reached max count followed by END: + * - Advance through END chain to reach absorption judgment point + * - Only deterministic exits (count >= max, max != INF) are handled + * - Chains through END elements while count >= max (must-exit path) + * + * Non-VAR elements (ALT, END, FIN) are kept as-is for advance phase. + */ +static void +nfa_match(WindowAggState *winstate, RPRNFAContext *ctx, bool *varMatched) +{ + RPRPattern *pattern = winstate->rpPattern; + RPRPatternElement *elements = pattern->elements; + RPRNFAState **prevPtr = &ctx->states; + RPRNFAState *state; + RPRNFAState *nextState; + + /* + * Evaluate VAR elements against current row. For VARs that reach max + * count with END next, advance through END chain inline so absorb phase + * can compare states at judgment points. + */ + for (state = ctx->states; state != NULL; state = nextState) + { + RPRPatternElement *elem = &elements[state->elemIdx]; + + CHECK_FOR_INTERRUPTS(); + + nextState = state->next; + + if (RPRElemIsVar(elem)) + { + bool matched; + int depth = elem->depth; + int32 count = state->counts[depth]; + + matched = nfa_eval_var_match(winstate, elem, varMatched); + + if (matched) + { + /* Increment count */ + if (count < RPR_COUNT_MAX) + count++; + + /* Max constraint should not be exceeded */ + Assert(elem->max == RPR_QUANTITY_INF || count <= elem->max); + + state->counts[depth] = count; + + /* + * For VAR at max count with END next, advance through END + * chain to reach the absorption judgment point. Only + * deterministic exits (count >= max, max finite) are handled; + * unbounded VARs stay for advance phase. + * + * In nested patterns like ((A B){2}){3}, a VAR reaching its + * max triggers an exit cascade: inner END increments inner + * group count, which may itself reach max, requiring an exit + * to the next outer END. The loop below walks this chain. + * + * ABSORBABLE_BRANCH marks elements inside the absorbable + * region; ABSORBABLE marks the outermost judgment point where + * count-dominance is evaluated. We chain through BRANCH + * elements until reaching the ABSORBABLE point or an element + * that can still loop (count < max). + */ + if (RPRElemIsAbsorbableBranch(elem) && + !RPRElemIsAbsorbable(elem) && + count >= elem->max && + RPRElemIsEnd(&elements[elem->next])) + { + RPRPatternElement *endElem = &elements[elem->next]; + int endDepth = endElem->depth; + int32 endCount = state->counts[endDepth]; + + /* Increment group count */ + if (endCount < RPR_COUNT_MAX) + endCount++; + Assert(endElem->max == RPR_QUANTITY_INF || + endCount <= endElem->max); + + state->elemIdx = elem->next; + state->counts[endDepth] = endCount; + + /* + * Chain through END elements within the absorbable region + * (ABSORBABLE_BRANCH) until reaching the judgment point + * (ABSORBABLE). Continue only on must-exit path (count + * >= max) with END next. + */ + while (RPRElemIsAbsorbableBranch(endElem) && + !RPRElemIsAbsorbable(endElem) && + endCount >= endElem->max && + RPRElemIsEnd(&elements[endElem->next])) + { + RPRPatternElement *outerEnd = &elements[endElem->next]; + int outerDepth = outerEnd->depth; + int32 outerCount = state->counts[outerDepth]; + + /* Reset exited group's count */ + state->counts[endDepth] = 0; + + /* Increment outer group count */ + if (outerCount < RPR_COUNT_MAX) + outerCount++; + Assert(outerEnd->max == RPR_QUANTITY_INF || + outerCount <= outerEnd->max); + + state->elemIdx = endElem->next; + state->counts[outerDepth] = outerCount; + + /* Advance to next END in chain */ + endElem = outerEnd; + endDepth = outerDepth; + endCount = outerCount; + } + } + /* else: stay at VAR for advance phase */ + } + else + { + /* + * Not matched - remove state. Exit alternatives were already + * created by advance phase when count >= min was satisfied. + */ + *prevPtr = nextState; + nfa_state_free(winstate, state); + continue; + } + } + /* Non-VAR elements: keep as-is for advance phase */ + + prevPtr = &state->next; + } +} + +/* + * nfa_route_to_elem + * + * Route state to next element. If VAR, add to ctx->states and process + * skip path if optional. Otherwise, continue epsilon expansion via recursion. + */ +static void +nfa_route_to_elem(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state, RPRPatternElement *nextElem, + int64 currentPos) +{ + if (RPRElemIsVar(nextElem)) + { + RPRNFAState *skipState = NULL; + + /* Create skip state before add_unique, which may free state */ + if (RPRElemCanSkip(nextElem)) + skipState = nfa_state_create(winstate, nextElem->next, + state->counts, state->isAbsorbable); + + nfa_add_state_unique(winstate, ctx, state); + + if (skipState != NULL) + nfa_advance_state(winstate, ctx, skipState, currentPos); + } + else + { + nfa_advance_state(winstate, ctx, state, currentPos); + } +} + +/* + * nfa_advance_alt + * + * Handle ALT element: expand all branches in lexical order via DFS. + */ +static void +nfa_advance_alt(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state, RPRPatternElement *elem, + int64 currentPos) +{ + RPRPattern *pattern = winstate->rpPattern; + RPRPatternElement *elements = pattern->elements; + RPRElemIdx altIdx = elem->next; + + while (altIdx >= 0 && altIdx < pattern->numElements) + { + RPRPatternElement *altElem = &elements[altIdx]; + RPRNFAState *newState; + + /* Stop if element is outside ALT scope (not a branch) */ + if (altElem->depth <= elem->depth) + break; + + /* Create independent state for each branch */ + newState = nfa_state_create(winstate, altIdx, + state->counts, state->isAbsorbable); + + /* Recursively process this branch before next */ + nfa_advance_state(winstate, ctx, newState, currentPos); + altIdx = altElem->jump; + } + + nfa_state_free(winstate, state); +} + +/* + * nfa_advance_begin + * + * Handle BEGIN element: group entry logic. + * BEGIN is only visited at initial group entry (count is always 0). + * If min=0, creates a skip path past the group. + * Loop-back from END goes directly to first child, bypassing BEGIN. + */ +static void +nfa_advance_begin(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state, RPRPatternElement *elem, + int64 currentPos) +{ + RPRPattern *pattern = winstate->rpPattern; + RPRPatternElement *elements = pattern->elements; + RPRNFAState *skipState = NULL; + + state->counts[elem->depth] = 0; + + /* Optional group: create skip path (but don't route yet) */ + if (elem->min == 0) + { + skipState = nfa_state_create(winstate, elem->jump, + state->counts, state->isAbsorbable); + } + + if (skipState != NULL && RPRElemIsReluctant(elem)) + { + RPRNFAState *savedMatch = ctx->matchedState; + + /* Reluctant: skip first (prefer fewer iterations), enter second */ + nfa_route_to_elem(winstate, ctx, skipState, + &elements[elem->jump], currentPos); + + /* + * If skip path reached FIN, shortest match is found. Skip group entry + * to prevent longer matches. + */ + if (ctx->matchedState != savedMatch) + { + nfa_state_free(winstate, state); + return; + } + + state->elemIdx = elem->next; + nfa_route_to_elem(winstate, ctx, state, + &elements[state->elemIdx], currentPos); + } + else + { + /* Greedy: enter first, skip second */ + state->elemIdx = elem->next; + nfa_route_to_elem(winstate, ctx, state, + &elements[state->elemIdx], currentPos); + + if (skipState != NULL) + { + nfa_route_to_elem(winstate, ctx, skipState, + &elements[elem->jump], currentPos); + } + } +} + +/* + * nfa_advance_end + * + * Handle END element: group repetition logic. + * Decides whether to loop back or exit based on count vs min/max. + */ +static void +nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state, RPRPatternElement *elem, + int64 currentPos) +{ + RPRPattern *pattern = winstate->rpPattern; + RPRPatternElement *elements = pattern->elements; + int depth = elem->depth; + int32 count = state->counts[depth]; + + if (count < elem->min) + { + RPRPatternElement *jumpElem; + RPRNFAState *ffState = NULL; + + /*---------- + * Two paths are explored in parallel when the group body is nullable + * (RPR_ELEM_EMPTY_LOOP): + * + * 1. Primary path: loop back and attempt real matches in the + * next iteration (state, modified below). + * + * 2. Fast-forward path: skip directly to after the group, + * treating all remaining required iterations as empty + * matches (ffState, handled after the primary path). + * + * The snapshot must be taken BEFORE modifying state for the loop-back, + * since both paths diverge from the same point. + *---------- + */ + if (RPRElemCanEmptyLoop(elem)) + ffState = nfa_state_create(winstate, state->elemIdx, + state->counts, state->isAbsorbable); + + /* Primary path: loop back for real matches */ + for (int d = depth + 1; d < pattern->maxDepth; d++) + state->counts[d] = 0; + state->elemIdx = elem->jump; + jumpElem = &elements[state->elemIdx]; + nfa_route_to_elem(winstate, ctx, state, jumpElem, + currentPos); + + /* + * Fast-forward path for nullable bodies. E.g. (A?){2,3} when A + * doesn't match: the primary loop-back produces empty iterations that + * cycle detection would kill. Instead, exit directly with count + * satisfied. Route to elem->next (not nfa_advance_end) to avoid + * creating competing greedy/reluctant loop states. + */ + if (ffState != NULL) + { + RPRPatternElement *nextElem; + + ffState->counts[depth] = 0; + ffState->elemIdx = elem->next; + nextElem = &elements[ffState->elemIdx]; + + /* END->END: increment outer END's count */ + if (RPRElemIsEnd(nextElem) && + ffState->counts[nextElem->depth] < RPR_COUNT_MAX) + ffState->counts[nextElem->depth]++; + + nfa_route_to_elem(winstate, ctx, ffState, nextElem, + currentPos); + } + } + else if (elem->max != RPR_QUANTITY_INF && count >= elem->max) + { + /* Must exit: reached max iterations. */ + RPRPatternElement *nextElem; + + state->counts[depth] = 0; + state->elemIdx = elem->next; + nextElem = &elements[state->elemIdx]; + + /* Update isAbsorbable for target element (monotonic) */ + state->isAbsorbable = state->isAbsorbable && + RPRElemIsAbsorbableBranch(nextElem); + + /* END->END: increment outer END's count */ + if (RPRElemIsEnd(nextElem) && state->counts[nextElem->depth] < RPR_COUNT_MAX) + state->counts[nextElem->depth]++; + + nfa_route_to_elem(winstate, ctx, state, nextElem, currentPos); + } + else + { + /* + * Between min and max (with at least one iteration) - can exit or + * loop. Greedy: loop first (prefer more iterations). Reluctant: exit + * first (prefer fewer iterations). + */ + RPRNFAState *exitState; + RPRPatternElement *jumpElem; + RPRPatternElement *nextElem; + + /* + * Create exit state first (need original counts before modifying + * state) + */ + exitState = nfa_state_create(winstate, elem->next, + state->counts, state->isAbsorbable); + exitState->counts[depth] = 0; + nextElem = &elements[exitState->elemIdx]; + + /* END->END: increment outer END's count */ + if (RPRElemIsEnd(nextElem) && exitState->counts[nextElem->depth] < RPR_COUNT_MAX) + exitState->counts[nextElem->depth]++; + + /* Prepare loop state */ + for (int d = depth + 1; d < pattern->maxDepth; d++) + state->counts[d] = 0; + state->elemIdx = elem->jump; + jumpElem = &elements[state->elemIdx]; + + if (RPRElemIsReluctant(elem)) + { + RPRNFAState *savedMatch = ctx->matchedState; + + /* Exit first (preferred for reluctant) */ + nfa_route_to_elem(winstate, ctx, exitState, nextElem, + currentPos); + + /* + * If exit path reached FIN, shortest match is found. Skip loop to + * prevent longer matches from replacing it. + */ + if (ctx->matchedState != savedMatch) + { + nfa_state_free(winstate, state); + return; + } + + /* Loop second */ + nfa_route_to_elem(winstate, ctx, state, jumpElem, + currentPos); + } + else + { + /* Loop first (preferred for greedy) */ + nfa_route_to_elem(winstate, ctx, state, jumpElem, + currentPos); + /* Exit second */ + nfa_route_to_elem(winstate, ctx, exitState, nextElem, + currentPos); + } + } +} + +/* + * nfa_advance_var + * + * Handle VAR element: loop/exit transitions. + * After match phase, all VAR states have matched - decide next action. + */ +static void +nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state, RPRPatternElement *elem, + int64 currentPos) +{ + RPRPattern *pattern = winstate->rpPattern; + RPRPatternElement *elements = pattern->elements; + int depth = elem->depth; + int32 count = state->counts[depth]; + bool canLoop = (elem->max == RPR_QUANTITY_INF || count < elem->max); + bool canExit = (count >= elem->min); + + /* After a successful match, count >= 1, so at least one must be true */ + Assert(canLoop || canExit); + + if (canLoop && canExit) + { + /* + * Both loop and exit possible. Greedy: loop first (prefer longer + * match). Reluctant: exit first (prefer shorter match). + */ + RPRNFAState *cloneState; + RPRPatternElement *nextElem; + bool reluctant = RPRElemIsReluctant(elem); + + /* + * Clone state for the first-priority path. For greedy, clone is the + * loop state; for reluctant, clone is the exit state. + */ + if (reluctant) + { + RPRNFAState *savedMatch = ctx->matchedState; + + /* Clone for exit, original stays for loop */ + cloneState = nfa_state_create(winstate, elem->next, + state->counts, state->isAbsorbable); + cloneState->counts[depth] = 0; + nextElem = &elements[cloneState->elemIdx]; + + /* When exiting directly to an outer END, increment its count */ + if (RPRElemIsEnd(nextElem)) + { + if (cloneState->counts[nextElem->depth] < RPR_COUNT_MAX) + cloneState->counts[nextElem->depth]++; + } + + /* Exit first (preferred for reluctant) */ + nfa_route_to_elem(winstate, ctx, cloneState, nextElem, + currentPos); + + /* + * If exit path reached FIN, the shortest match is found. Skip + * loop state to prevent longer matches from replacing it. + */ + if (ctx->matchedState != savedMatch) + { + nfa_state_free(winstate, state); + return; + } + + /* Loop second */ + nfa_add_state_unique(winstate, ctx, state); + } + else + { + /* Clone for loop, original used for exit */ + cloneState = nfa_state_create(winstate, state->elemIdx, + state->counts, state->isAbsorbable); + + /* Loop first (preferred for greedy) */ + nfa_add_state_unique(winstate, ctx, cloneState); + + /* Exit second */ + state->counts[depth] = 0; + state->elemIdx = elem->next; + nextElem = &elements[state->elemIdx]; + + /* + * Update isAbsorbable for target element (monotonic: AND + * preserves false) + */ + state->isAbsorbable = state->isAbsorbable && + RPRElemIsAbsorbableBranch(nextElem); + + /* + * When exiting directly to an outer END, increment its iteration + * count. Simple VARs (min=max=1) handle this via inline advance + * in nfa_match, but quantified VARs bypass that path. + */ + if (RPRElemIsEnd(nextElem)) + { + if (state->counts[nextElem->depth] < RPR_COUNT_MAX) + state->counts[nextElem->depth]++; + } + + nfa_route_to_elem(winstate, ctx, state, nextElem, + currentPos); + } + } + else if (canLoop) + { + /* Loop only: keep state as-is */ + nfa_add_state_unique(winstate, ctx, state); + } + else if (canExit) + { + /* Exit only: advance to next element */ + RPRPatternElement *nextElem; + + state->counts[depth] = 0; + state->elemIdx = elem->next; + nextElem = &elements[state->elemIdx]; + + /* + * Update isAbsorbable for target element (monotonic: AND preserves + * false) + */ + state->isAbsorbable = state->isAbsorbable && + RPRElemIsAbsorbableBranch(nextElem); + + /* See comment above: increment outer END count for quantified VARs */ + if (RPRElemIsEnd(nextElem)) + { + if (state->counts[nextElem->depth] < RPR_COUNT_MAX) + state->counts[nextElem->depth]++; + } + + nfa_route_to_elem(winstate, ctx, state, nextElem, currentPos); + } +} + +/* + * nfa_advance_state + * + * Recursively process a single state through epsilon transitions. + * DFS traversal ensures states are added to ctx->states in lexical order. + */ +static void +nfa_advance_state(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state, int64 currentPos) +{ + RPRPattern *pattern = winstate->rpPattern; + RPRPatternElement *elem; + + Assert(state->elemIdx >= 0 && state->elemIdx < pattern->numElements); + + /* Protect against stack overflow for deeply complex patterns */ + check_stack_depth(); + + /* Cycle detection: if this elemIdx was already visited in this DFS, bail */ + if (winstate->nfaVisitedElems[WORDNUM(state->elemIdx)] & + ((bitmapword) 1 << BITNUM(state->elemIdx))) + { + nfa_state_free(winstate, state); + return; + } + + elem = &pattern->elements[state->elemIdx]; + + /* + * Mark epsilon elements (END, ALT, BEGIN, FIN) in visited to prevent + * infinite epsilon cycles. VAR elements are marked later when added to + * the state list (nfa_add_state_unique), allowing legitimate loop-back to + * the same VAR in a new iteration. + */ + if (!RPRElemIsVar(elem)) + winstate->nfaVisitedElems[WORDNUM(state->elemIdx)] |= + ((bitmapword) 1 << BITNUM(state->elemIdx)); + + switch (elem->varId) + { + case RPR_VARID_FIN: + /* FIN: record match */ + nfa_add_matched_state(winstate, ctx, state, currentPos); + break; + + case RPR_VARID_ALT: + nfa_advance_alt(winstate, ctx, state, elem, currentPos); + break; + + case RPR_VARID_BEGIN: + nfa_advance_begin(winstate, ctx, state, elem, currentPos); + break; + + case RPR_VARID_END: + nfa_advance_end(winstate, ctx, state, elem, currentPos); + break; + + default: + /* VAR element */ + nfa_advance_var(winstate, ctx, state, elem, currentPos); + break; + } +} + +/* + * nfa_advance + * + * Advance phase (divergence): transition from all surviving states. + * Called after match phase with matched VAR states, or at context creation + * for initial epsilon expansion (with currentPos = startPos - 1). + * + * Processes states in order, using recursive DFS to maintain lexical order. + */ +static void +nfa_advance(WindowAggState *winstate, RPRNFAContext *ctx, int64 currentPos) +{ + RPRNFAState *states = ctx->states; + RPRNFAState *state; + RPRNFAState *savedMatchedState; + + ctx->states = NULL; /* Will rebuild */ + + /* Process each state in lexical order (DFS order from previous advance) */ + while (states != NULL) + { + CHECK_FOR_INTERRUPTS(); + savedMatchedState = ctx->matchedState; + + /* Clear visited bitmap before each state's DFS expansion */ + memset(winstate->nfaVisitedElems, 0, + sizeof(bitmapword) * winstate->nfaVisitedNWords); + + state = states; + states = states->next; + state->next = NULL; + + nfa_advance_state(winstate, ctx, state, currentPos); + + /* + * Early termination: if a FIN was newly reached in this advance, + * remaining old states have worse lexical order and can be pruned. + * Only check for new FIN arrivals (not ones from previous rows). + */ + if (ctx->matchedState != savedMatchedState && states != NULL) + { + nfa_state_free_list(winstate, states); + break; + } + } +} + + +/*********************************************************************** + * API exposed to nodeWindowAgg.c + ***********************************************************************/ + +/* + * ExecRPRStartContext + * + * Start a new match context at given position. + * Initializes context, state absorption flags, and performs initial advance + * to expand epsilon transitions (ALT branches, optional elements). + * Adds context to the tail of winstate->nfaContext list. + */ +RPRNFAContext * +ExecRPRStartContext(WindowAggState *winstate, int64 startPos) +{ + RPRNFAContext *ctx; + RPRPattern *pattern = winstate->rpPattern; + RPRPatternElement *elem; + + ctx = nfa_context_alloc(winstate); + ctx->matchStartRow = startPos; + ctx->states = nfa_state_alloc(winstate); /* initial state at elem 0 */ + + elem = &pattern->elements[0]; + + if (RPRElemIsAbsorbableBranch(elem)) + { + ctx->states->isAbsorbable = true; + } + else + { + ctx->hasAbsorbableState = false; + ctx->allStatesAbsorbable = false; + ctx->states->isAbsorbable = false; + } + + /* Add to tail of active context list (doubly-linked, oldest-first) */ + ctx->prev = winstate->nfaContextTail; + ctx->next = NULL; + if (winstate->nfaContextTail != NULL) + winstate->nfaContextTail->next = ctx; + else + winstate->nfaContext = ctx; /* first context becomes head */ + winstate->nfaContextTail = ctx; + + /* + * Initial advance (divergence): expand ALT branches and create exit + * states for VAR elements with min=0. This prepares the context for the + * first row's match phase. + * + * Use startPos - 1 as currentPos since no row has been consumed yet. If + * FIN is reached via epsilon transitions, matchEndRow = startPos - 1 + * which is less than matchStartRow, resulting in UNMATCHED treatment. + */ + nfa_advance(winstate, ctx, startPos - 1); + + return ctx; +} + +/* + * ExecRPRGetHeadContext + * + * Return the head context if its start position matches pos. + * Returns NULL if no context exists or head doesn't match pos. + */ +RPRNFAContext * +ExecRPRGetHeadContext(WindowAggState *winstate, int64 pos) +{ + RPRNFAContext *ctx = winstate->nfaContext; + + /* + * Contexts are sorted by matchStartRow ascending. If the head context + * doesn't match pos, no context exists for this position. + */ + if (ctx == NULL || ctx->matchStartRow != pos) + return NULL; + + return ctx; +} + +/* + * ExecRPRFreeContext + * + * Unlink context from active list and return it to free list. + * Also frees any states in the context. + */ +void +ExecRPRFreeContext(WindowAggState *winstate, RPRNFAContext *ctx) +{ + /* Unlink from active list first */ + nfa_unlink_context(winstate, ctx); + + /* Update statistics */ + winstate->nfaContextsActive--; + + if (ctx->states != NULL) + nfa_state_free_list(winstate, ctx->states); + if (ctx->matchedState != NULL) + nfa_state_free(winstate, ctx->matchedState); + + ctx->states = NULL; + ctx->matchedState = NULL; + ctx->next = winstate->nfaContextFree; + winstate->nfaContextFree = ctx; +} + +/* + * ExecRPRRecordContextSuccess + * + * Record a successful context in statistics. + */ +void +ExecRPRRecordContextSuccess(WindowAggState *winstate, int64 matchLen) +{ + winstate->nfaMatchesSucceeded++; + nfa_update_length_stats(winstate->nfaMatchesSucceeded, + &winstate->nfaMatchLen, + matchLen); +} + +/* + * ExecRPRRecordContextFailure + * + * Record a failed context in statistics. + * If failedLen == 1, count as pruned (failed on first row). + * If failedLen > 1, count as mismatched and update length stats. + */ +void +ExecRPRRecordContextFailure(WindowAggState *winstate, int64 failedLen) +{ + if (failedLen == 1) + { + winstate->nfaContextsPruned++; + } + else + { + winstate->nfaMatchesFailed++; + nfa_update_length_stats(winstate->nfaMatchesFailed, + &winstate->nfaFailLen, + failedLen); + } +} + +/* + * nfa_reevaluate_dependent_vars + * Re-evaluate match_start-dependent DEFINE variables for a specific + * context whose matchStartRow differs from the shared evaluation's + * nav_match_start. + * + * Only variables in defineMatchStartDependent are re-evaluated. The + * current row's slot (ecxt_outertuple) must already be set up by + * nfa_evaluate_row(). + */ +static void +nfa_reevaluate_dependent_vars(WindowAggState *winstate, RPRNFAContext *ctx, + int64 currentPos) +{ + ExprContext *econtext = winstate->ss.ps.ps_ExprContext; + int64 saved_match_start = winstate->nav_match_start; + int64 saved_pos = winstate->currentpos; + int varIdx = 0; + ListCell *lc; + + /* Temporarily set nav_match_start and currentpos for FIRST/LAST */ + winstate->nav_match_start = ctx->matchStartRow; + winstate->currentpos = currentPos; + + /* Invalidate nav_slot cache since match_start changed */ + winstate->nav_slot_pos = -1; + + foreach(lc, winstate->defineClauseList) + { + if (bms_is_member(varIdx, winstate->defineMatchStartDependent)) + { + ExprState *exprState = (ExprState *) lfirst(lc); + Datum result; + bool isnull; + + result = ExecEvalExpr(exprState, econtext, &isnull); + winstate->nfaVarMatched[varIdx] = (!isnull && DatumGetBool(result)); + } + + varIdx++; + if (varIdx >= list_length(winstate->defineVariableList)) + break; + } + + /* Restore original match_start, currentpos, and invalidate cache */ + winstate->nav_match_start = saved_match_start; + winstate->currentpos = saved_pos; + winstate->nav_slot_pos = -1; +} + +/* + * ExecRPRProcessRow + * + * Process all contexts for one row: + * 1. Match all contexts (convergence) - evaluate VARs, prune dead states + * 2. Absorb redundant contexts - ideal timing after convergence + * 3. Advance all contexts (divergence) - create new states for next row + */ +void +ExecRPRProcessRow(WindowAggState *winstate, int64 currentPos, + bool hasLimitedFrame, int64 frameOffset) +{ + RPRNFAContext *ctx; + bool *varMatched = winstate->nfaVarMatched; + bool hasDependent = !bms_is_empty(winstate->defineMatchStartDependent); + + /* Allow query cancellation once per row for simple/low-state patterns */ + CHECK_FOR_INTERRUPTS(); + + /* + * Phase 1: Match all contexts (convergence). Evaluate VAR elements, + * update counts, remove dead states. + */ + for (ctx = winstate->nfaContext; ctx != NULL; ctx = ctx->next) + { + if (ctx->states == NULL) + continue; + + /* Check frame boundary - finalize if exceeded */ + if (hasLimitedFrame) + { + int64 ctxFrameEnd; + + /* Clamp to INT64_MAX on overflow */ + if (pg_add_s64_overflow(ctx->matchStartRow, frameOffset + 1, + &ctxFrameEnd)) + ctxFrameEnd = PG_INT64_MAX; + + if (currentPos >= ctxFrameEnd) + { + /* Frame boundary exceeded: force mismatch */ + nfa_match(winstate, ctx, NULL); + continue; + } + } + + /* + * If this context has a different matchStartRow than the one used in + * the shared evaluation, re-evaluate match_start-dependent variables + * with this context's matchStartRow. + */ + if (hasDependent && ctx->matchStartRow != winstate->nav_match_start) + nfa_reevaluate_dependent_vars(winstate, ctx, currentPos); + nfa_match(winstate, ctx, varMatched); + ctx->lastProcessedRow = currentPos; + } + + /* + * Phase 2: Absorb redundant contexts. After match phase, states have + * converged - ideal for absorption. First update absorption flags that + * may have changed due to state removal. + */ + if (winstate->rpPattern->isAbsorbable) + { + for (ctx = winstate->nfaContext; ctx != NULL; ctx = ctx->next) + nfa_update_absorption_flags(ctx); + + nfa_absorb_contexts(winstate); + } + + /* + * Phase 3: Advance all contexts (divergence). Create new states + * (loop/exit) from surviving matched states. + */ + for (ctx = winstate->nfaContext; ctx != NULL; ctx = ctx->next) + { + if (ctx->states == NULL) + continue; + + /* + * Phase 1 already handled frame boundary exceeded contexts by forcing + * mismatch (nfa_match with NULL), which removes all states (all + * states are at VAR positions after advance). So any surviving + * context here must be within its frame boundary. + */ + Assert(!hasLimitedFrame || + ctx->matchStartRow > PG_INT64_MAX - frameOffset - 1 || + currentPos < ctx->matchStartRow + frameOffset + 1); + + nfa_advance(winstate, ctx, currentPos); + } +} + +/* + * ExecRPRCleanupDeadContexts + * + * Remove contexts that have failed (no active states and no match). + * These are contexts that failed during normal processing and should be + * counted as pruned (if length 1) or mismatched (if length > 1). + */ +void +ExecRPRCleanupDeadContexts(WindowAggState *winstate, RPRNFAContext *excludeCtx) +{ + RPRNFAContext *ctx; + RPRNFAContext *next; + + for (ctx = winstate->nfaContext; ctx != NULL; ctx = next) + { + CHECK_FOR_INTERRUPTS(); + + next = ctx->next; + + /* Skip the target context and contexts still processing */ + if (ctx == excludeCtx || ctx->states != NULL) + continue; + + /* Skip successfully matched contexts (will be handled by SKIP logic) */ + if (ctx->matchEndRow >= ctx->matchStartRow) + continue; + + /* + * This is a failed context - count and remove it. Only count if it + * actually processed its start row. Contexts created for + * beyond-partition rows are silently removed. + */ + if (ctx->lastProcessedRow >= ctx->matchStartRow) + { + int64 failedLen = ctx->lastProcessedRow - ctx->matchStartRow + 1; + + ExecRPRRecordContextFailure(winstate, failedLen); + } + /* else: context was never processed (beyond-partition), just remove */ + + ExecRPRFreeContext(winstate, ctx); + } +} + +/* + * ExecRPRFinalizeAllContexts + * + * Finalize all active contexts when partition ends. + * Match with NULL to force mismatch, then advance to process epsilon transitions. + */ +void +ExecRPRFinalizeAllContexts(WindowAggState *winstate, int64 lastPos) +{ + RPRNFAContext *ctx; + + for (ctx = winstate->nfaContext; ctx != NULL; ctx = ctx->next) + { + CHECK_FOR_INTERRUPTS(); + + if (ctx->states != NULL) + { + nfa_match(winstate, ctx, NULL); + nfa_advance(winstate, ctx, lastPos); + } + } +} diff --git a/src/backend/executor/meson.build b/src/backend/executor/meson.build index dc45be0b2ce..0ff4a5b1d83 100644 --- a/src/backend/executor/meson.build +++ b/src/backend/executor/meson.build @@ -13,6 +13,7 @@ backend_sources += files( 'execParallel.c', 'execPartition.c', 'execProcnode.c', + 'execRPR.c', 'execReplication.c', 'execSRF.c', 'execScan.c', diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index f52a7aae843..93cb9bbdd11 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -36,15 +36,19 @@ #include "access/htup_details.h" #include "catalog/objectaccess.h" #include "catalog/pg_aggregate.h" +#include "common/int.h" #include "catalog/pg_proc.h" #include "common/int.h" #include "executor/executor.h" +#include "executor/execRPR.h" #include "executor/instrument.h" #include "executor/nodeWindowAgg.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" +#include "nodes/plannodes.h" #include "optimizer/clauses.h" #include "optimizer/optimizer.h" +#include "optimizer/rpr.h" #include "parser/parse_agg.h" #include "parser/parse_coerce.h" #include "utils/acl.h" @@ -173,6 +177,7 @@ typedef struct WindowStatePerAggData bool restart; /* need to restart this agg in this cycle? */ } WindowStatePerAggData; + static void initialize_windowaggregate(WindowAggState *winstate, WindowStatePerFunc perfuncstate, WindowStatePerAgg peraggstate); @@ -209,6 +214,9 @@ static Datum GetAggInitVal(Datum textInitVal, Oid transtype); static bool are_peers(WindowAggState *winstate, TupleTableSlot *slot1, TupleTableSlot *slot2); +static int WinGetSlotInFrame(WindowObject winobj, TupleTableSlot *slot, + int relpos, int seektype, bool set_mark, + bool *isnull, bool *isout); static bool window_gettupleslot(WindowObject winobj, int64 pos, TupleTableSlot *slot); @@ -227,6 +235,19 @@ static uint8 get_notnull_info(WindowObject winobj, int64 pos, int argno); static void put_notnull_info(WindowObject winobj, int64 pos, int argno, bool isnull); +static bool rpr_is_defined(WindowAggState *winstate); +static int64 row_is_in_reduced_frame(WindowObject winobj, int64 pos); + +static void clear_reduced_frame(WindowAggState *winstate); +static int get_reduced_frame_status(WindowAggState *winstate, int64 pos); +static void update_reduced_frame(WindowObject winobj, int64 pos); + +/* Forward declarations - NFA row evaluation */ +static bool nfa_evaluate_row(WindowObject winobj, int64 pos, bool *varMatched); + +/* Forward declarations - navigation offset evaluation */ +static void eval_nav_max_offset(WindowAggState *winstate, List *defineClause); +static void eval_nav_first_offset(WindowAggState *winstate, List *defineClause); /* * Not null info bit array consists of 2-bit items @@ -820,6 +841,9 @@ eval_windowaggregates(WindowAggState *winstate) * transition function, or * - we have an EXCLUSION clause, or * - if the new frame doesn't overlap the old one + * - if RPR (Row Pattern Recognition) is enabled, because the reduced + * frame depends on pattern matching results which can differ entirely + * from row to row, making inverse transition optimization inapplicable * * Note that we don't strictly need to restart in the last case, but if * we're going to remove all rows from the aggregation anyway, a restart @@ -834,7 +858,8 @@ eval_windowaggregates(WindowAggState *winstate) (winstate->aggregatedbase != winstate->frameheadpos && !OidIsValid(peraggstate->invtransfn_oid)) || (winstate->frameOptions & FRAMEOPTION_EXCLUSION) || - winstate->aggregatedupto <= winstate->frameheadpos) + winstate->aggregatedupto <= winstate->frameheadpos || + rpr_is_defined(winstate)) { peraggstate->restart = true; numaggs_restart++; @@ -963,6 +988,14 @@ eval_windowaggregates(WindowAggState *winstate) { winstate->aggregatedupto = winstate->frameheadpos; ExecClearTuple(agg_row_slot); + + /* + * If RPR is defined, we do not use aggregatedupto_nonrestarted. To + * avoid assertion failure below, we reset aggregatedupto_nonrestarted + * to frameheadpos. + */ + if (rpr_is_defined(winstate)) + aggregatedupto_nonrestarted = winstate->frameheadpos; } /* @@ -974,7 +1007,7 @@ eval_windowaggregates(WindowAggState *winstate) */ for (;;) { - int ret; + int64 ret; /* Fetch next row if we didn't already */ if (TupIsNull(agg_row_slot)) @@ -992,9 +1025,40 @@ eval_windowaggregates(WindowAggState *winstate) agg_row_slot, false); if (ret < 0) break; + if (ret == 0) goto next_tuple; + if (rpr_is_defined(winstate)) + { + /* + * If currentpos is already decided but aggregatedupto is not yet + * determined, we've passed the last reduced frame. + */ + if (get_reduced_frame_status(winstate, winstate->currentpos) + != RF_NOT_DETERMINED && + get_reduced_frame_status(winstate, winstate->aggregatedupto) + == RF_NOT_DETERMINED) + break; + + /* + * Calculate the reduced frame for aggregatedupto. + */ + ret = row_is_in_reduced_frame(winstate->agg_winobj, + winstate->aggregatedupto); + if (ret == -1) /* unmatched row */ + break; + + /* + * Check if current row is inside a match but not the head + * (skipped), and it's the base row for aggregation. + */ + if (get_reduced_frame_status(winstate, + winstate->aggregatedupto) == RF_SKIPPED && + winstate->aggregatedupto == winstate->aggregatedbase) + break; + } + /* Set tuple context for evaluation of aggregate arguments */ winstate->tmpcontext->ecxt_outertuple = agg_row_slot; @@ -1023,6 +1087,7 @@ next_tuple: ExecClearTuple(agg_row_slot); } + /* The frame's end is not supposed to move backwards, ever */ Assert(aggregatedupto_nonrestarted <= winstate->aggregatedupto); @@ -1190,6 +1255,28 @@ prepare_tuplestore(WindowAggState *winstate) } } + /* Create read/mark pointers for RPR navigation if needed */ + if (winstate->nav_winobj) + { + /* + * Allocate mark and read pointers for RPR navigation. + * + * If navMaxOffsetKind == RPR_NAV_OFFSET_FIXED, we advance the mark + * based on (currentpos - navMaxOffset) and optionally + * (nfaContext->matchStartRow + navFirstOffset), allowing + * tuplestore_trim() to free rows that are no longer reachable. + * + * RPR_NAV_OFFSET_NEEDS_EVAL is resolved at executor init; by this + * point it is either FIXED or RETAIN_ALL. + */ + winstate->nav_winobj->markptr = + tuplestore_alloc_read_pointer(winstate->buffer, 0); + winstate->nav_winobj->readptr = + tuplestore_alloc_read_pointer(winstate->buffer, + EXEC_FLAG_BACKWARD); + winstate->nav_winobj->markpos = 0; + } + /* * If we are in RANGE or GROUPS mode, then determining frame boundaries * requires physical access to the frame endpoint rows, except in certain @@ -1246,6 +1333,8 @@ begin_partition(WindowAggState *winstate) winstate->framehead_valid = false; winstate->frametail_valid = false; winstate->grouptail_valid = false; + if (rpr_is_defined(winstate)) + clear_reduced_frame(winstate); winstate->spooled_rows = 0; winstate->currentpos = 0; winstate->frameheadpos = 0; @@ -1299,6 +1388,13 @@ begin_partition(WindowAggState *winstate) winstate->aggregatedupto = 0; } + /* reset mark and seek positions for RPR navigation */ + if (winstate->nav_winobj) + { + winstate->nav_winobj->markpos = -1; + winstate->nav_winobj->seekpos = -1; + } + /* reset mark and seek positions for each real window function */ for (int i = 0; i < numfuncs; i++) { @@ -1467,6 +1563,18 @@ release_partition(WindowAggState *winstate) tuplestore_clear(winstate->buffer); winstate->partition_spooled = false; winstate->next_partition = true; + + /* Reset RPR match results */ + clear_reduced_frame(winstate); + + /* Reset NFA state for new partition */ + winstate->nfaContext = NULL; + winstate->nfaContextTail = NULL; + winstate->nfaContextFree = NULL; + winstate->nfaStateFree = NULL; + winstate->nfaLastProcessedRow = -1; + winstate->nfaStatesActive = 0; + winstate->nfaContextsActive = 0; } /* @@ -2395,6 +2503,16 @@ ExecWindowAgg(PlanState *pstate) /* don't evaluate the window functions when we're in pass-through mode */ if (winstate->status == WINDOWAGG_RUN) { + /* + * If RPR is defined and skip mode is next row, clear the current + * match so the next row triggers re-evaluation. + */ + if (rpr_is_defined(winstate)) + { + if (winstate->rpSkipTo == ST_NEXT_ROW) + clear_reduced_frame(winstate); + } + /* * Evaluate true window functions */ @@ -2434,6 +2552,45 @@ ExecWindowAgg(PlanState *pstate) if (winstate->grouptail_ptr >= 0) update_grouptailpos(winstate); + /* + * Advance RPR navigation mark pointer if possible, so that + * tuplestore_trim() can free rows no longer reachable by navigation. + */ + if (winstate->nav_winobj && + winstate->rpPattern != NULL && + winstate->navMaxOffsetKind == RPR_NAV_OFFSET_FIXED) + { + int64 navmarkpos; + + /* Backward reach from PREV/LAST/compound PREV_LAST/NEXT_LAST */ + if (winstate->currentpos > winstate->navMaxOffset) + navmarkpos = winstate->currentpos - winstate->navMaxOffset; + else + navmarkpos = 0; + + /* + * If FIRST is used, also consider match_start + navFirstOffset. + * The oldest active context (nfaContext) has the smallest + * matchStartRow. + */ + if (winstate->hasFirstNav && + winstate->navFirstOffsetKind == RPR_NAV_OFFSET_FIXED && + winstate->nfaContext != NULL) + { + int64 firstreach; + + if (winstate->navFirstOffset > -winstate->nfaContext->matchStartRow) + firstreach = winstate->nfaContext->matchStartRow + + winstate->navFirstOffset; + else + firstreach = 0; + navmarkpos = Min(navmarkpos, firstreach); + } + + if (navmarkpos > winstate->nav_winobj->markpos) + WinSetMarkPosition(winstate->nav_winobj, navmarkpos); + } + /* * Truncate any no-longer-needed rows from the tuplestore. */ @@ -2659,6 +2816,20 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) winstate->temp_slot_2 = ExecInitExtraTupleSlot(estate, scanDesc, &TTSOpsMinimalTuple); + if (node->rpPattern != NULL) + { + winstate->nav_slot = ExecInitExtraTupleSlot(estate, scanDesc, + &TTSOpsMinimalTuple); + winstate->nav_slot_pos = -1; + + winstate->nav_null_slot = ExecInitExtraTupleSlot(estate, scanDesc, + &TTSOpsMinimalTuple); + winstate->nav_null_slot = ExecStoreAllNullTuple(winstate->nav_null_slot); + + winstate->nav_saved_outertuple = NULL; + winstate->nav_match_start = 0; + } + /* * create frame head and tail slots only if needed (must create slots in * exactly the same cases that update_frameheadpos and update_frametailpos @@ -2827,6 +2998,23 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) winstate->agg_winobj = agg_winobj; } + /* + * Set up WindowObject for RPR navigation opcodes. This is separate from + * agg_winobj because it needs its own read pointer to avoid interfering + * with aggregate processing. + */ + if (node->rpPattern != NULL) + { + WindowObject nav_winobj = makeNode(WindowObjectData); + + nav_winobj->winstate = winstate; + nav_winobj->argstates = NIL; + nav_winobj->localmem = NULL; + nav_winobj->markptr = -1; + nav_winobj->readptr = -1; + winstate->nav_winobj = nav_winobj; + } + /* Set the status to running */ winstate->status = WINDOWAGG_RUN; @@ -2845,6 +3033,81 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) winstate->inRangeAsc = node->inRangeAsc; winstate->inRangeNullsFirst = node->inRangeNullsFirst; + /* Set up SKIP TO type */ + winstate->rpSkipTo = node->rpSkipTo; + /* Set up row pattern recognition PATTERN clause (compiled NFA) */ + winstate->rpPattern = node->rpPattern; + /* Set up nav offsets for tuplestore trim */ + winstate->navMaxOffsetKind = node->navMaxOffsetKind; + winstate->navMaxOffset = node->navMaxOffset; + if (winstate->navMaxOffsetKind == RPR_NAV_OFFSET_NEEDS_EVAL) + eval_nav_max_offset(winstate, node->defineClause); + winstate->hasFirstNav = node->hasFirstNav; + winstate->navFirstOffsetKind = node->navFirstOffsetKind; + winstate->navFirstOffset = node->navFirstOffset; + if (winstate->hasFirstNav && + winstate->navFirstOffsetKind == RPR_NAV_OFFSET_NEEDS_EVAL) + eval_nav_first_offset(winstate, node->defineClause); + + /* Copy match_start dependency bitmapset for per-context evaluation */ + winstate->defineMatchStartDependent = bms_copy(node->defineMatchStartDependent); + + /* Calculate NFA state size and allocate cycle detection bitmap */ + if (node->rpPattern != NULL) + { + winstate->nfaStateSize = offsetof(RPRNFAState, counts) + + sizeof(int32) * node->rpPattern->maxDepth; + winstate->nfaVisitedNWords = + (node->rpPattern->numElements - 1) / BITS_PER_BITMAPWORD + 1; + winstate->nfaVisitedElems = palloc0(sizeof(bitmapword) * + winstate->nfaVisitedNWords); + } + + /* Set up row pattern recognition DEFINE clause */ + winstate->defineVariableList = NIL; + winstate->defineClauseList = NIL; + if (node->defineClause != NIL) + { + /* + * Compile DEFINE clause expressions. PREV/NEXT navigation is handled + * by EEOP_RPR_NAV_SET/RESTORE opcodes emitted during ExecInitExpr, so + * no varno rewriting is needed here. + */ + foreach(l, node->defineClause) + { + TargetEntry *te = lfirst(l); + char *name = te->resname; + Expr *expr = te->expr; + ExprState *exps; + + winstate->defineVariableList = + lappend(winstate->defineVariableList, + makeString(pstrdup(name))); + exps = ExecInitExpr(expr, (PlanState *) winstate); + winstate->defineClauseList = + lappend(winstate->defineClauseList, exps); + } + } + + /* Initialize NFA free lists for row pattern matching */ + winstate->nfaContext = NULL; + winstate->nfaContextTail = NULL; + winstate->nfaContextFree = NULL; + winstate->nfaStateFree = NULL; + winstate->nfaLastProcessedRow = -1; + winstate->nfaStatesActive = 0; + winstate->nfaContextsActive = 0; + + /* + * Allocate varMatched array for NFA evaluation. With the new varNames + * ordering (DEFINE order first), varId == defineIdx for all defined + * variables, so no mapping is needed. + */ + if (list_length(winstate->defineVariableList) > 0) + winstate->nfaVarMatched = palloc0(sizeof(bool) * + list_length(winstate->defineVariableList)); + else + winstate->nfaVarMatched = NULL; winstate->all_first = true; winstate->partition_spooled = false; winstate->more_partitions = false; @@ -2853,6 +3116,42 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) return winstate; } +/* + * ExecRPRNavGetSlot + * + * Fetch tuple at given position for RPR navigation opcodes. + * Returns nav_slot with the tuple loaded, or nav_null_slot if out of range. + */ +TupleTableSlot * +ExecRPRNavGetSlot(WindowAggState *winstate, int64 pos) +{ + WindowObject winobj = winstate->nav_winobj; + TupleTableSlot *slot = winstate->nav_slot; + + if (pos < 0) + return winstate->nav_null_slot; + + /* + * If nav_slot already holds this position, return it without re-fetching. + * This is critical when multiple PREV/NEXT calls in the same expression + * navigate to the same row, because re-fetching would free the slot's + * tuple memory and invalidate any pass-by-ref Datum pointers from earlier + * navigation results. + */ + if (winstate->nav_slot_pos == pos) + return slot; + + if (!window_gettupleslot(winobj, pos, slot)) + { + winstate->nav_slot_pos = -1; + return winstate->nav_null_slot; + } + + winstate->nav_slot_pos = pos; + return slot; +} + + /* ----------------- * ExecEndWindowAgg * ----------------- @@ -2910,6 +3209,8 @@ ExecReScanWindowAgg(WindowAggState *node) ExecClearTuple(node->agg_row_slot); ExecClearTuple(node->temp_slot_1); ExecClearTuple(node->temp_slot_2); + if (node->nav_slot) + ExecClearTuple(node->nav_slot); if (node->framehead_slot) ExecClearTuple(node->framehead_slot); if (node->frametail_slot) @@ -3270,7 +3571,8 @@ window_gettupleslot(WindowObject winobj, int64 pos, TupleTableSlot *slot) return false; if (pos < winobj->markpos) - elog(ERROR, "cannot fetch row before WindowObject's mark position"); + elog(ERROR, "cannot fetch row: " INT64_FORMAT " before WindowObject's mark position: " INT64_FORMAT, + pos, winobj->markpos); oldcontext = MemoryContextSwitchTo(winstate->ss.ps.ps_ExprContext->ecxt_per_query_memory); @@ -3387,6 +3689,7 @@ ignorenulls_getfuncarginframe(WindowObject winobj, int argno, int notnull_offset; int notnull_relpos; int forward; + int64 num_reduced_frame; Assert(WindowObjectIsValid(winobj)); winstate = winobj->winstate; @@ -3415,6 +3718,13 @@ ignorenulls_getfuncarginframe(WindowObject winobj, int argno, /* rejecting relpos > 0 is easy and simplifies code below */ if (relpos > 0) goto out_of_frame; + + /* + * RPR cares about frame head pos. Need to call + * update_frameheadpos + */ + update_frameheadpos(winstate); + update_frametailpos(winstate); abs_pos = winstate->frametailpos - 1; mark_pos = 0; /* keep compiler quiet */ @@ -3430,6 +3740,35 @@ ignorenulls_getfuncarginframe(WindowObject winobj, int argno, * Get the next nonnull value in the frame, moving forward or backward * until we find a value or reach the frame's end. */ + + /* + * Check whether current row is in reduced frame. + */ + num_reduced_frame = row_is_in_reduced_frame(winobj, winstate->frameheadpos); + if (num_reduced_frame < 0) /* unmatched or skipped row */ + goto out_of_frame; + else if (num_reduced_frame > 0) /* the first row of the reduced frame */ + { + /* + * Early check if row could be out of reduced frame. When RPR is + * enabled, EXCLUDE clause cannot be specified and the frame is always + * contiguous. So we can safely perform the following checks. Note, + * however, it is possible that a row is out of reduced frame if + * there's a NULL in the middle. So we need to check it in the + * following do loop. + */ + if (seektype == WINDOW_SEEK_HEAD && relpos >= num_reduced_frame) + goto out_of_frame; + if (seektype == WINDOW_SEEK_TAIL) + { + if (notnull_relpos >= num_reduced_frame) + goto out_of_frame; + + /* not out of reduced frame. Set abspos as a starting point */ + abs_pos = winstate->frameheadpos + num_reduced_frame - 1; + } + } + do { int inframe; @@ -3491,6 +3830,16 @@ ignorenulls_getfuncarginframe(WindowObject winobj, int argno, } advance: abs_pos += forward; + if (rpr_is_defined(winstate)) + { + /* + * Check whether we are still in the reduced frame. (also check + * if we succeeded in getting the target row). + */ + num_reduced_frame--; + if (num_reduced_frame <= 0 && notnull_offset <= notnull_relpos) + goto out_of_frame; + } } while (notnull_offset <= notnull_relpos); if (set_mark) @@ -3612,6 +3961,631 @@ put_notnull_info(WindowObject winobj, int64 pos, int argno, bool isnull) mbp[bpos] = mb; } +/* + * eval_nav_offset_helper + * Evaluate an offset expression at executor init time for trim + * optimization. Returns the offset value, or 0 for NULL/negative + * (these will cause a runtime error during actual navigation, so the + * trim value is irrelevant). + */ +static int64 +eval_nav_offset_helper(WindowAggState *winstate, Expr *offset_expr, + int64 defaultOffset) +{ + ExprContext *econtext = winstate->ss.ps.ps_ExprContext; + ExprState *estate; + Datum val; + bool isnull; + int64 offset; + + if (offset_expr == NULL) + return defaultOffset; + + estate = ExecInitExpr(offset_expr, (PlanState *) winstate); + val = ExecEvalExprSwitchContext(estate, econtext, &isnull); + + if (isnull) + return 0; + + offset = DatumGetInt64(val); + if (offset < 0) + return 0; + + return offset; +} + +typedef struct +{ + WindowAggState *winstate; + int64 maxOffset; + bool overflow; /* true if overflow detected */ +} EvalNavMaxContext; + +/* + * eval_nav_max_offset_walker + * Walk expression tree evaluating backward-reach offsets at runtime. + * + * Handles simple PREV, LAST-with-offset, and compound PREV_LAST/NEXT_LAST. + */ +static bool +eval_nav_max_offset_walker(Node *node, void *ctx) +{ + EvalNavMaxContext *context = (EvalNavMaxContext *) ctx; + + if (node == NULL) + return false; + + /* Short-circuit if overflow already detected */ + if (context->overflow) + return false; + + if (IsA(node, RPRNavExpr)) + { + RPRNavExpr *nav = (RPRNavExpr *) node; + int64 reach = 0; + + if (nav->kind == RPR_NAV_PREV) + { + reach = eval_nav_offset_helper(context->winstate, + nav->offset_arg, 1); + } + else if (nav->kind == RPR_NAV_LAST && nav->offset_arg != NULL) + { + reach = eval_nav_offset_helper(context->winstate, + nav->offset_arg, 0); + } + else if (nav->kind == RPR_NAV_PREV_LAST || + nav->kind == RPR_NAV_NEXT_LAST) + { + int64 inner = eval_nav_offset_helper(context->winstate, + nav->offset_arg, 0); + int64 outer = eval_nav_offset_helper(context->winstate, + nav->compound_offset_arg, 1); + + if (nav->kind == RPR_NAV_PREV_LAST) + { + if (pg_add_s64_overflow(inner, outer, &reach)) + { + context->overflow = true; + return false; + } + } + else + reach = (inner > outer) ? inner - outer : 0; + } + + context->maxOffset = Max(context->maxOffset, reach); + + return false; /* don't walk into children */ + } + + return expression_tree_walker(node, eval_nav_max_offset_walker, ctx); +} + +/* + * eval_nav_max_offset + * Evaluate non-constant backward-reach offsets at executor init time. + * + * Called when the planner set navMaxOffsetKind to RPR_NAV_OFFSET_NEEDS_EVAL + * because some offset in PREV, LAST-with-offset, or compound PREV_LAST/ + * NEXT_LAST contains a parameter or non-foldable expression. + * + * On overflow, sets navMaxOffsetKind to RPR_NAV_OFFSET_RETAIN_ALL so that + * tuplestore trim is disabled for backward navigation. + */ +static void +eval_nav_max_offset(WindowAggState *winstate, List *defineClause) +{ + EvalNavMaxContext ctx; + ListCell *lc; + + ctx.winstate = winstate; + ctx.maxOffset = 0; + ctx.overflow = false; + + foreach(lc, defineClause) + { + TargetEntry *te = (TargetEntry *) lfirst(lc); + + eval_nav_max_offset_walker((Node *) te->expr, &ctx); + } + + if (ctx.overflow) + { + winstate->navMaxOffsetKind = RPR_NAV_OFFSET_RETAIN_ALL; + winstate->navMaxOffset = 0; + } + else + { + winstate->navMaxOffsetKind = RPR_NAV_OFFSET_FIXED; + winstate->navMaxOffset = ctx.maxOffset; + } +} + +typedef struct +{ + WindowAggState *winstate; + int64 minOffset; + bool found; +} EvalNavFirstContext; + +/* + * eval_nav_first_offset_walker + * Walk expression tree evaluating forward-from-match_start offsets. + * + * Handles simple FIRST and compound PREV_FIRST/NEXT_FIRST. + */ +static bool +eval_nav_first_offset_walker(Node *node, void *ctx) +{ + EvalNavFirstContext *context = (EvalNavFirstContext *) ctx; + + if (node == NULL) + return false; + + if (IsA(node, RPRNavExpr)) + { + RPRNavExpr *nav = (RPRNavExpr *) node; + int64 combined = INT64_MAX; + + if (nav->kind == RPR_NAV_FIRST) + { + context->found = true; + combined = eval_nav_offset_helper(context->winstate, + nav->offset_arg, 0); + } + else if (nav->kind == RPR_NAV_PREV_FIRST || + nav->kind == RPR_NAV_NEXT_FIRST) + { + int64 inner = eval_nav_offset_helper(context->winstate, + nav->offset_arg, 0); + int64 outer = eval_nav_offset_helper(context->winstate, + nav->compound_offset_arg, 1); + + context->found = true; + if (nav->kind == RPR_NAV_PREV_FIRST) + { + /* + * combined = inner - outer. Both are non-negative, so the + * result >= -INT64_MAX, which cannot underflow int64. + */ + combined = inner - outer; + } + else + { + /* + * NEXT_FIRST: combined = inner + outer. This can overflow, + * but the result is always >= 0, so it never updates + * minOffset (which tracks the minimum). Clamp to INT64_MAX + * on overflow. + */ + if (pg_add_s64_overflow(inner, outer, &combined)) + combined = INT64_MAX; + } + } + + context->minOffset = Min(context->minOffset, combined); + + return false; + } + + return expression_tree_walker(node, eval_nav_first_offset_walker, ctx); +} + +/* + * eval_nav_first_offset + * Evaluate non-constant forward-from-match_start offsets at executor + * init time. + * + * Called when the planner set navFirstOffsetKind to RPR_NAV_OFFSET_NEEDS_EVAL + * because some offset in FIRST or compound PREV_FIRST/NEXT_FIRST contains + * a parameter or non-foldable expression. + */ +static void +eval_nav_first_offset(WindowAggState *winstate, List *defineClause) +{ + EvalNavFirstContext ctx; + ListCell *lc; + + ctx.winstate = winstate; + ctx.minOffset = INT64_MAX; + ctx.found = false; + + foreach(lc, defineClause) + { + TargetEntry *te = (TargetEntry *) lfirst(lc); + + eval_nav_first_offset_walker((Node *) te->expr, &ctx); + } + + if (ctx.found && ctx.minOffset < INT64_MAX) + { + winstate->navFirstOffsetKind = RPR_NAV_OFFSET_FIXED; + winstate->navFirstOffset = ctx.minOffset; + } + else + { + winstate->navFirstOffsetKind = RPR_NAV_OFFSET_FIXED; + winstate->navFirstOffset = 0; + } +} + +/* + * rpr_is_defined + * Return true if row pattern recognition is defined. + */ +static bool +rpr_is_defined(WindowAggState *winstate) +{ + return winstate->rpPattern != NULL; +} + +/* + * ----------------- + * row_is_in_reduced_frame + * Determine whether a row is in the current row's reduced window frame + * according to row pattern matching + * + * The row must have already been determined to be in a full window frame + * and fetched into the slot. + * + * Returns: + * = 0, RPR is not defined. + * >0, if the row is the first in the reduced frame. Return the number of rows + * in the reduced frame. + * -1, if the row is an unmatched row + * -2, if the row is in the reduced frame but needed to be skipped because of + * AFTER MATCH SKIP PAST LAST ROW + * ----------------- + */ +static int64 +row_is_in_reduced_frame(WindowObject winobj, int64 pos) +{ + WindowAggState *winstate = winobj->winstate; + int state; + int64 rtn; + + if (!rpr_is_defined(winstate)) + { + /* + * RPR is not defined. Assume that we are always in the reduced window + * frame. + */ + rtn = 0; + return rtn; + } + + state = get_reduced_frame_status(winstate, pos); + + if (state == RF_NOT_DETERMINED) + { + update_frameheadpos(winstate); + update_reduced_frame(winobj, pos); + } + + state = get_reduced_frame_status(winstate, pos); + + switch (state) + { + case RF_FRAME_HEAD: + rtn = winstate->rpr_match_length; + break; + + case RF_SKIPPED: + rtn = -2; + break; + + case RF_UNMATCHED: + case RF_EMPTY_MATCH: + rtn = -1; + break; + + default: + elog(ERROR, "unrecognized state: %d at: " INT64_FORMAT, + state, pos); + break; + } + + return rtn; +} + +/* + * clear_reduced_frame + * Clear reduced frame status + */ +static void +clear_reduced_frame(WindowAggState *winstate) +{ + winstate->rpr_match_valid = false; + winstate->rpr_match_matched = false; + winstate->rpr_match_start = -1; + winstate->rpr_match_length = 0; +} + +/* + * get_reduced_frame_status + * Look up a position against the current match. + * + * Returns one of the RF_* constants: + * RF_NOT_DETERMINED pos has not been processed yet + * RF_FRAME_HEAD pos is the start of the current match + * RF_SKIPPED pos is inside the current match but not the start + * RF_UNMATCHED pos is processed but not part of any match + */ +static int +get_reduced_frame_status(WindowAggState *winstate, int64 pos) +{ + int64 start = winstate->rpr_match_start; + int64 length = winstate->rpr_match_length; + + if (!winstate->rpr_match_valid) + return RF_NOT_DETERMINED; + + /* Empty match: covers only the start position */ + if (pos == start && winstate->rpr_match_matched && length == 0) + return RF_EMPTY_MATCH; + + /* Outside the result range */ + if (pos < start || pos >= start + length) + return RF_NOT_DETERMINED; + + if (!winstate->rpr_match_matched) + return RF_UNMATCHED; + + if (pos == start) + return RF_FRAME_HEAD; + + return RF_SKIPPED; +} + +/* + * update_reduced_frame + * Update reduced frame info using multi-context NFA pattern matching. + * + * Maintains multiple NFA contexts simultaneously, one for each potential + * match start position. This allows sharing row evaluations across contexts, + * avoiding redundant DEFINE clause evaluations when rewinding for SKIP TO + * NEXT ROW mode. + * + * Key optimizations: + * - Row evaluations (expensive DEFINE clauses) happen only once per row + * - All active contexts share the same evaluation results + * - Contexts persist across calls, enabling O(n) DEFINE evaluations + */ +static void +update_reduced_frame(WindowObject winobj, int64 pos) +{ + WindowAggState *winstate = winobj->winstate; + RPRNFAContext *targetCtx; + int64 currentPos; + int64 startPos; + int frameOptions = winstate->frameOptions; + bool hasLimitedFrame; + int64 frameOffset = 0; + int64 matchLen; + + /* + * Check if we have a limited frame (ROWS ... N FOLLOWING). Each context + * needs its own frame end based on matchStartRow + offset. + */ + hasLimitedFrame = (frameOptions & FRAMEOPTION_ROWS) && + !(frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING); + if (hasLimitedFrame && winstate->endOffsetValue != 0) + frameOffset = DatumGetInt64(winstate->endOffsetValue); + + /* + * Case 1: pos is before any existing context's start position. This means + * the position was already processed and determined unmatched. Head is + * the oldest context (lowest matchStartRow) since contexts are added at + * tail with increasing positions. + */ + if (winstate->nfaContext != NULL && + pos < winstate->nfaContext->matchStartRow) + { + /* already processed, unmatched */ + winstate->rpr_match_valid = true; + winstate->rpr_match_matched = false; + winstate->rpr_match_start = pos; + winstate->rpr_match_length = 1; + return; + } + + /* + * Case 2: Find existing context for this pos, or create new one. + */ + targetCtx = ExecRPRGetHeadContext(winstate, pos); + if (targetCtx == NULL) + { + /* + * No context exists. If pos is already processed, it means this row + * was already determined to be unmatched or skipped - no need to + * reprocess. + */ + if (pos <= winstate->nfaLastProcessedRow) + { + /* already processed, unmatched */ + winstate->rpr_match_valid = true; + winstate->rpr_match_matched = false; + winstate->rpr_match_start = pos; + winstate->rpr_match_length = 1; + return; + } + /* Not yet processed - create new context and start fresh */ + targetCtx = ExecRPRStartContext(winstate, pos); + } + else if (targetCtx->states == NULL) + { + /* Context already completed - skip to result registration */ + goto register_result; + } + + /* + * Determine where to start processing. Usually nfaLastProcessedRow+1 >= + * pos since contexts are created at currentPos+1 during processing. + * However, pos can exceed this when rows are skipped (e.g., unmatched + * rows don't update nfaLastProcessedRow). + */ + startPos = Max(pos, winstate->nfaLastProcessedRow + 1); + + /* + * Process rows until target context completes or we hit boundaries. Each + * row evaluation is shared across all active contexts. + */ + for (currentPos = startPos; targetCtx->states != NULL; currentPos++) + { + bool rowExists; + + /* + * Evaluate variables for this row - done only once, shared by all + * contexts. + * + * Set nav_match_start to the head context's matchStartRow for + * FIRST/LAST navigation. Match_start-dependent variables (FIRST, + * LAST-with-offset) are re-evaluated per-context in ExecRPRProcessRow + * when matchStartRow differs. + */ + winstate->nav_match_start = targetCtx->matchStartRow; + rowExists = nfa_evaluate_row(winobj, currentPos, winstate->nfaVarMatched); + + /* No more rows in partition? Finalize all contexts */ + if (!rowExists) + { + ExecRPRFinalizeAllContexts(winstate, currentPos - 1); + /* Clean up dead contexts from finalization */ + ExecRPRCleanupDeadContexts(winstate, targetCtx); + break; + } + + /* Update last processed row */ + winstate->nfaLastProcessedRow = currentPos; + + /*-------------------------- + * Process all contexts for this row: + * 1. Match all (convergence) + * 2. Absorb redundant + * 3. Advance all (divergence) + */ + ExecRPRProcessRow(winstate, currentPos, hasLimitedFrame, frameOffset); + + /* + * Create a new context for the next potential start position. This + * enables overlapping match detection for SKIP TO NEXT ROW. + */ + ExecRPRStartContext(winstate, currentPos + 1); + + /* + * Clean up dead contexts (failed with no active states and no match). + * This removes contexts that failed during processing and counts them + * appropriately as pruned or mismatched. + */ + ExecRPRCleanupDeadContexts(winstate, targetCtx); + } + +register_result: + Assert(pos == targetCtx->matchStartRow); + + /* + * Record match result. + */ + winstate->rpr_match_valid = true; + winstate->rpr_match_start = targetCtx->matchStartRow; + + if (targetCtx->matchEndRow < targetCtx->matchStartRow) + { + matchLen = targetCtx->lastProcessedRow - targetCtx->matchStartRow + 1; + + if (targetCtx->matchedState != NULL) + { + /* Empty match: FIN reached but 0 rows consumed */ + winstate->rpr_match_matched = true; + winstate->rpr_match_length = 0; + ExecRPRRecordContextSuccess(winstate, 0); + } + else + { + /* No match */ + winstate->rpr_match_matched = false; + winstate->rpr_match_length = 1; + ExecRPRRecordContextFailure(winstate, matchLen); + } + ExecRPRFreeContext(winstate, targetCtx); + return; + } + + /* Match succeeded */ + matchLen = targetCtx->matchEndRow - targetCtx->matchStartRow + 1; + + winstate->rpr_match_matched = true; + winstate->rpr_match_length = matchLen; + ExecRPRRecordContextSuccess(winstate, matchLen); + + /* Remove the matched context */ + ExecRPRFreeContext(winstate, targetCtx); +} + +/* + * nfa_evaluate_row + * + * Evaluate all DEFINE variables for current row. + * Returns true if the row exists, false if out of partition. + * If row exists, fills varMatched array. + * varMatched[i] = true if variable i matched at current row. + * + * Uses 1-slot model: only ecxt_outertuple is set to the current row. + * PREV/NEXT/FIRST/LAST navigation is handled by EEOP_RPR_NAV_SET/RESTORE + * opcodes during expression evaluation, which temporarily swap the slot. + */ +static bool +nfa_evaluate_row(WindowObject winobj, int64 pos, bool *varMatched) +{ + WindowAggState *winstate = winobj->winstate; + ExprContext *econtext = winstate->ss.ps.ps_ExprContext; + int numDefineVars = list_length(winstate->defineVariableList); + ListCell *lc; + int varIdx = 0; + TupleTableSlot *slot; + int64 saved_pos; + + /* Fetch current row into temp_slot_1 */ + slot = winstate->temp_slot_1; + if (!window_gettupleslot(winobj, pos, slot)) + return false; /* No row exists */ + + /* Set up 1-slot context: only ecxt_outertuple */ + econtext->ecxt_outertuple = slot; + + /* + * Save and set currentpos so that EEOP_RPR_NAV_SET opcodes can calculate + * target positions (currentpos +/- offset). + */ + saved_pos = winstate->currentpos; + winstate->currentpos = pos; + + /* Invalidate nav_slot cache so PREV/NEXT re-fetch for new row */ + winstate->nav_slot_pos = -1; + + foreach(lc, winstate->defineClauseList) + { + ExprState *exprState = (ExprState *) lfirst(lc); + Datum result; + bool isnull; + + /* Evaluate DEFINE expression */ + result = ExecEvalExpr(exprState, econtext, &isnull); + + varMatched[varIdx] = (!isnull && DatumGetBool(result)); + + varIdx++; + if (varIdx >= numDefineVars) + break; + } + + winstate->currentpos = saved_pos; + + return true; /* Row exists */ +} + + /*********************************************************************** * API exposed to window functions ***********************************************************************/ @@ -3972,8 +4946,6 @@ WinGetFuncArgInFrame(WindowObject winobj, int argno, WindowAggState *winstate; ExprContext *econtext; TupleTableSlot *slot; - int64 abs_pos; - int64 mark_pos; Assert(WindowObjectIsValid(winobj)); winstate = winobj->winstate; @@ -3984,6 +4956,48 @@ WinGetFuncArgInFrame(WindowObject winobj, int argno, return ignorenulls_getfuncarginframe(winobj, argno, relpos, seektype, set_mark, isnull, isout); + if (WinGetSlotInFrame(winobj, slot, + relpos, seektype, set_mark, + isnull, isout) == 0) + { + econtext->ecxt_outertuple = slot; + return ExecEvalExpr((ExprState *) list_nth(winobj->argstates, argno), + econtext, isnull); + } + + if (isout) + *isout = true; + *isnull = true; + return (Datum) 0; +} + +/* + * WinGetSlotInFrame + * slot: TupleTableSlot to store the result + * relpos: signed rowcount offset from the seek position + * seektype: WINDOW_SEEK_HEAD or WINDOW_SEEK_TAIL + * set_mark: If the row is found/in frame and set_mark is true, the mark is + * moved to the row as a side-effect. + * isnull: output argument, receives isnull status of result + * isout: output argument, set to indicate whether target row position + * is out of frame (can pass NULL if caller doesn't care about this) + * + * Returns 0 if we successfully got the slot, or nonzero if out of frame. + * (isout is also set in the latter case.) + */ +static int +WinGetSlotInFrame(WindowObject winobj, TupleTableSlot *slot, + int relpos, int seektype, bool set_mark, + bool *isnull, bool *isout) +{ + WindowAggState *winstate; + int64 abs_pos; + int64 mark_pos; + int64 num_reduced_frame; + + Assert(WindowObjectIsValid(winobj)); + winstate = winobj->winstate; + switch (seektype) { case WINDOW_SEEK_CURRENT: @@ -4050,11 +5064,25 @@ WinGetFuncArgInFrame(WindowObject winobj, int argno, winstate->frameOptions); break; } + num_reduced_frame = row_is_in_reduced_frame(winobj, + winstate->frameheadpos); + if (num_reduced_frame < 0) + goto out_of_frame; + else if (num_reduced_frame > 0) + if (relpos >= num_reduced_frame) + goto out_of_frame; break; case WINDOW_SEEK_TAIL: /* rejecting relpos > 0 is easy and simplifies code below */ if (relpos > 0) goto out_of_frame; + + /* + * RPR cares about frame head pos. Need to call + * update_frameheadpos + */ + update_frameheadpos(winstate); + update_frametailpos(winstate); abs_pos = winstate->frametailpos - 1 + relpos; @@ -4121,6 +5149,18 @@ WinGetFuncArgInFrame(WindowObject winobj, int argno, mark_pos = 0; /* keep compiler quiet */ break; } + + num_reduced_frame = row_is_in_reduced_frame(winobj, + winstate->frameheadpos); + if (num_reduced_frame < 0) + goto out_of_frame; + else if (num_reduced_frame > 0) + { + if (-relpos >= num_reduced_frame) + goto out_of_frame; + abs_pos = winstate->frameheadpos + relpos + + num_reduced_frame - 1; + } break; default: elog(ERROR, "unrecognized window seek type: %d", seektype); @@ -4138,16 +5178,24 @@ WinGetFuncArgInFrame(WindowObject winobj, int argno, if (isout) *isout = false; if (set_mark) + { + /* + * If RPR is enabled and seek type is WINDOW_SEEK_TAIL, we set the + * mark position unconditionally to frameheadpos. In this case the + * frame always starts at CURRENT_ROW and never goes back, thus + * setting the mark at the position is safe. + */ + if (winstate->rpPattern != NULL && seektype == WINDOW_SEEK_TAIL) + mark_pos = winstate->frameheadpos; WinSetMarkPosition(winobj, mark_pos); - econtext->ecxt_outertuple = slot; - return ExecEvalExpr((ExprState *) list_nth(winobj->argstates, argno), - econtext, isnull); + } + return 0; out_of_frame: if (isout) *isout = true; *isnull = true; - return (Datum) 0; + return -1; } /* diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c index 0e160b8502c..e42c5d65bb6 100644 --- a/src/backend/jit/llvm/llvmjit_expr.c +++ b/src/backend/jit/llvm/llvmjit_expr.c @@ -129,6 +129,9 @@ llvm_compile_expr(ExprState *state) LLVMValueRef v_aggvalues; LLVMValueRef v_aggnulls; + /* RPR navigation: when true, EEOP_OUTER_VAR reloads from econtext */ + bool has_rpr_nav; + instr_time starttime; instr_time deform_starttime; instr_time endtime; @@ -298,6 +301,36 @@ llvm_compile_expr(ExprState *state) FIELDNO_EXPRCONTEXT_AGGNULLS, "v.econtext.aggnulls"); + /* + * RPR navigation opcodes (PREV/NEXT) swap ecxt_outertuple to a different + * row mid-expression. The JIT code loads v_outervalues and v_outernulls + * once in the entry block and reuses them for all EEOP_OUTER_VAR steps. + * After a slot swap, these cached pointers become stale because the new + * slot has its own tts_values/tts_isnull arrays. + * + * When RPR navigation opcodes are present, EEOP_OUTER_VAR reloads the + * slot pointer from econtext->ecxt_outertuple on every access instead of + * using the cached entry-block values. This avoids the SSA/PHI + * complexity while keeping the rest of the expression JIT-compiled. + * Expressions without RPR navigation use the cached values as before. + */ + has_rpr_nav = false; + if (parent && IsA(parent, WindowAggState) && + ((WindowAgg *) parent->plan)->rpPattern != NULL) + { + for (int opno = 0; opno < state->steps_len; opno++) + { + ExprEvalOp opcode = ExecEvalStepOp(state, &state->steps[opno]); + + if (opcode == EEOP_RPR_NAV_SET || + opcode == EEOP_RPR_NAV_RESTORE) + { + has_rpr_nav = true; + break; + } + } + } + /* allocate blocks for each op upfront, so we can do jumps easily */ opblocks = palloc_array(LLVMBasicBlockRef, state->steps_len); for (int opno = 0; opno < state->steps_len; opno++) @@ -460,8 +493,37 @@ llvm_compile_expr(ExprState *state) } else if (opcode == EEOP_OUTER_VAR) { - v_values = v_outervalues; - v_nulls = v_outernulls; + if (has_rpr_nav) + { + /* + * RPR navigation swaps ecxt_outertuple + * mid-expression. Reload slot pointer from + * econtext on every access so we read from the + * current (possibly swapped) slot. + */ + LLVMValueRef v_tmpslot; + + v_tmpslot = l_load_struct_gep(b, + StructExprContext, + v_econtext, + FIELDNO_EXPRCONTEXT_OUTERTUPLE, + "v_outerslot_reload"); + v_values = l_load_struct_gep(b, + StructTupleTableSlot, + v_tmpslot, + FIELDNO_TUPLETABLESLOT_VALUES, + "v_outervalues_reload"); + v_nulls = l_load_struct_gep(b, + StructTupleTableSlot, + v_tmpslot, + FIELDNO_TUPLETABLESLOT_ISNULL, + "v_outernulls_reload"); + } + else + { + v_values = v_outervalues; + v_nulls = v_outernulls; + } } else if (opcode == EEOP_SCAN_VAR) { @@ -2434,6 +2496,18 @@ llvm_compile_expr(ExprState *state) LLVMBuildBr(b, opblocks[opno + 1]); break; + case EEOP_RPR_NAV_SET: + build_EvalXFunc(b, mod, "ExecEvalRPRNavSet", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_RPR_NAV_RESTORE: + build_EvalXFunc(b, mod, "ExecEvalRPRNavRestore", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + case EEOP_AGG_STRICT_DESERIALIZE: case EEOP_AGG_DESERIALIZE: { diff --git a/src/backend/jit/llvm/llvmjit_types.c b/src/backend/jit/llvm/llvmjit_types.c index c8a1f841293..e78b31d775f 100644 --- a/src/backend/jit/llvm/llvmjit_types.c +++ b/src/backend/jit/llvm/llvmjit_types.c @@ -168,6 +168,8 @@ void *referenced_functions[] = ExecEvalScalarArrayOp, ExecEvalHashedScalarArrayOp, ExecEvalSubPlan, + ExecEvalRPRNavSet, + ExecEvalRPRNavRestore, ExecEvalSysVar, ExecEvalWholeRowVar, ExecEvalXmlExpr, diff --git a/src/backend/utils/adt/windowfuncs.c b/src/backend/utils/adt/windowfuncs.c index 78b7f05aba2..46e7a03a666 100644 --- a/src/backend/utils/adt/windowfuncs.c +++ b/src/backend/utils/adt/windowfuncs.c @@ -41,7 +41,6 @@ static bool rank_up(WindowObject winobj); static Datum leadlag_common(FunctionCallInfo fcinfo, bool forward, bool withoffset, bool withdefault); - /* * utility routine for *_rank functions. */ @@ -724,3 +723,121 @@ window_nth_value(PG_FUNCTION_ARGS) PG_RETURN_DATUM(result); } + +/* + * prev + * Catalog placeholder for RPR's PREV navigation operator. + * + * The parser transforms prev() calls inside DEFINE into RPRNavExpr nodes, + * so this function is never reached during normal RPR execution. It exists + * only so that the parser can resolve the function name from pg_proc. + * Calls outside DEFINE are rejected by parse_func.c (EXPR_KIND_RPR_DEFINE + * check). The error below is a defensive measure in case that check is + * bypassed (e.g., direct C-level function invocation). + */ +Datum +window_prev(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use PREV() outside a DEFINE clause"))); + PG_RETURN_NULL(); /* not reached */ +} + +/* + * next + * Catalog placeholder for RPR's NEXT navigation operator. + * See window_prev() for details. + */ +Datum +window_next(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use NEXT() outside a DEFINE clause"))); + PG_RETURN_NULL(); /* not reached */ +} + +/* + * prev(value, offset) + * Catalog placeholder for RPR's PREV navigation operator with offset. + * See window_prev() for details. + */ +Datum +window_prev_offset(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use PREV() outside a DEFINE clause"))); + PG_RETURN_NULL(); /* not reached */ +} + +/* + * next(value, offset) + * Catalog placeholder for RPR's NEXT navigation operator with offset. + * See window_prev() for details. + */ +Datum +window_next_offset(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use NEXT() outside a DEFINE clause"))); + PG_RETURN_NULL(); /* not reached */ +} + +/* + * first + * Catalog placeholder for RPR's FIRST navigation operator. + * See window_prev() for details. + */ +Datum +window_first(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use FIRST() outside a DEFINE clause"))); + PG_RETURN_NULL(); /* not reached */ +} + +/* + * last + * Catalog placeholder for RPR's LAST navigation operator. + * See window_prev() for details. + */ +Datum +window_last(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use LAST() outside a DEFINE clause"))); + PG_RETURN_NULL(); /* not reached */ +} + +/* + * first(value, offset) + * Catalog placeholder for RPR's FIRST navigation operator with offset. + * See window_prev() for details. + */ +Datum +window_first_offset(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use FIRST() outside a DEFINE clause"))); + PG_RETURN_NULL(); /* not reached */ +} + +/* + * last(value, offset) + * Catalog placeholder for RPR's LAST navigation operator with offset. + * See window_prev() for details. + */ +Datum +window_last_offset(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use LAST() outside a DEFINE clause"))); + PG_RETURN_NULL(); /* not reached */ +} diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index fa9ae79082b..02b668fc4c5 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -11017,6 +11017,30 @@ { oid => '3114', descr => 'fetch the Nth row value', proname => 'nth_value', prokind => 'w', prorettype => 'anyelement', proargtypes => 'anyelement int4', prosrc => 'window_nth_value' }, +{ oid => '8126', descr => 'fetch the preceding row value', + proname => 'prev', provolatile => 's', prorettype => 'anyelement', + proargtypes => 'anyelement', prosrc => 'window_prev' }, +{ oid => '8128', descr => 'fetch the Nth preceding row value', + proname => 'prev', provolatile => 's', proisstrict => 'f', prorettype => 'anyelement', + proargtypes => 'anyelement int8', prosrc => 'window_prev_offset' }, +{ oid => '8127', descr => 'fetch the following row value', + proname => 'next', provolatile => 's', prorettype => 'anyelement', + proargtypes => 'anyelement', prosrc => 'window_next' }, +{ oid => '8129', descr => 'fetch the Nth following row value', + proname => 'next', provolatile => 's', proisstrict => 'f', prorettype => 'anyelement', + proargtypes => 'anyelement int8', prosrc => 'window_next_offset' }, +{ oid => '8130', descr => 'fetch the first row value within match', + proname => 'first', provolatile => 's', prorettype => 'anyelement', + proargtypes => 'anyelement', prosrc => 'window_first' }, +{ oid => '8132', descr => 'fetch the Nth row value within match', + proname => 'first', provolatile => 's', proisstrict => 'f', prorettype => 'anyelement', + proargtypes => 'anyelement int8', prosrc => 'window_first_offset' }, +{ oid => '8131', descr => 'fetch the last row value within match', + proname => 'last', provolatile => 's', prorettype => 'anyelement', + proargtypes => 'anyelement', prosrc => 'window_last' }, +{ oid => '8133', descr => 'fetch the Nth-from-last row value within match', + proname => 'last', provolatile => 's', proisstrict => 'f', prorettype => 'anyelement', + proargtypes => 'anyelement int8', prosrc => 'window_last_offset' }, # functions for range types { oid => '3832', descr => 'I/O', diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h index c61b3d624d5..db66ebe313c 100644 --- a/src/include/executor/execExpr.h +++ b/src/include/executor/execExpr.h @@ -274,6 +274,10 @@ typedef enum ExprEvalOp EEOP_MERGE_SUPPORT_FUNC, EEOP_SUBPLAN, + /* row pattern navigation (RPR PREV/NEXT) */ + EEOP_RPR_NAV_SET, + EEOP_RPR_NAV_RESTORE, + /* aggregation related nodes */ EEOP_AGG_STRICT_DESERIALIZE, EEOP_AGG_DESERIALIZE, @@ -695,6 +699,18 @@ typedef struct ExprEvalStep SubPlanState *sstate; } subplan; + /* for EEOP_RPR_NAV_SET / EEOP_RPR_NAV_RESTORE */ + struct + { + WindowAggState *winstate; + RPRNavKind kind; /* navigation kind (simple or compound) */ + Datum *offset_value; /* offset value(s), or NULL */ + bool *offset_isnull; /* offset null flag(s) */ + /* For compound nav: offset_value[0] = inner, [1] = outer */ + int16 resulttyplen; /* RESTORE: result type length */ + bool resulttypbyval; /* RESTORE: result pass-by-value? */ + } rpr_nav; + /* for EEOP_AGG_*DESERIALIZE */ struct { @@ -902,6 +918,10 @@ extern void ExecEvalMergeSupportFunc(ExprState *state, ExprEvalStep *op, ExprContext *econtext); extern void ExecEvalSubPlan(ExprState *state, ExprEvalStep *op, ExprContext *econtext); +extern void ExecEvalRPRNavSet(ExprState *state, ExprEvalStep *op, + ExprContext *econtext); +extern void ExecEvalRPRNavRestore(ExprState *state, ExprEvalStep *op, + ExprContext *econtext); extern void ExecEvalWholeRowVar(ExprState *state, ExprEvalStep *op, ExprContext *econtext); extern void ExecEvalSysVar(ExprState *state, ExprEvalStep *op, diff --git a/src/include/executor/execRPR.h b/src/include/executor/execRPR.h new file mode 100644 index 00000000000..7b2b0febb76 --- /dev/null +++ b/src/include/executor/execRPR.h @@ -0,0 +1,40 @@ +/*------------------------------------------------------------------------- + * + * execRPR.h + * prototypes for execRPR.c (NFA-based Row Pattern Recognition engine) + * + * + * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/executor/execRPR.h + * + *------------------------------------------------------------------------- + */ +#ifndef EXECRPR_H +#define EXECRPR_H + +#include "nodes/execnodes.h" +#include "windowapi.h" + +/* NFA context management */ +extern RPRNFAContext *ExecRPRStartContext(WindowAggState *winstate, + int64 startPos); +extern RPRNFAContext *ExecRPRGetHeadContext(WindowAggState *winstate, + int64 pos); +extern void ExecRPRFreeContext(WindowAggState *winstate, RPRNFAContext *ctx); + +/* NFA processing */ +extern void ExecRPRProcessRow(WindowAggState *winstate, int64 currentPos, + bool hasLimitedFrame, int64 frameOffset); +extern void ExecRPRCleanupDeadContexts(WindowAggState *winstate, + RPRNFAContext *excludeCtx); +extern void ExecRPRFinalizeAllContexts(WindowAggState *winstate, int64 lastPos); + +/* NFA statistics */ +extern void ExecRPRRecordContextSuccess(WindowAggState *winstate, + int64 matchLen); +extern void ExecRPRRecordContextFailure(WindowAggState *winstate, + int64 failedLen); + +#endif /* EXECRPR_H */ diff --git a/src/include/executor/nodeWindowAgg.h b/src/include/executor/nodeWindowAgg.h index ada4a1c458c..f6f6645131c 100644 --- a/src/include/executor/nodeWindowAgg.h +++ b/src/include/executor/nodeWindowAgg.h @@ -20,4 +20,7 @@ extern WindowAggState *ExecInitWindowAgg(WindowAgg *node, EState *estate, int ef extern void ExecEndWindowAgg(WindowAggState *node); extern void ExecReScanWindowAgg(WindowAggState *node); +/* RPR navigation support for expression evaluation opcodes */ +extern TupleTableSlot *ExecRPRNavGetSlot(WindowAggState *winstate, int64 pos); + #endif /* NODEWINDOWAGG_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 13359180d25..0cb01baa949 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -38,6 +38,9 @@ #include "nodes/plannodes.h" #include "partitioning/partdefs.h" #include "storage/buf.h" +#include "storage/condition_variable.h" +#include "utils/hsearch.h" +#include "utils/queryenvironment.h" #include "utils/reltrigger.h" #include "utils/typcache.h" @@ -2524,6 +2527,71 @@ typedef enum WindowAggStatus * tuples during spool */ } WindowAggStatus; +/* RPR reduced frame states returned by get_reduced_frame_status() */ +#define RF_NOT_DETERMINED 0 /* not yet processed */ +#define RF_FRAME_HEAD 1 /* start row of a match */ +#define RF_SKIPPED 2 /* interior row of a match */ +#define RF_UNMATCHED 3 /* no match at this row */ +#define RF_EMPTY_MATCH 4 /* empty match (0 rows); treated as unmatched */ + +/* + * RPRNFAState - single NFA state for pattern matching + * + * counts[] tracks repetition counts at each nesting depth. + * + * isAbsorbable tracks if state is in absorbable region (ABSORBABLE_BRANCH). + * Monotonic property: once false, stays false (can't re-enter region). + */ +typedef struct RPRNFAState +{ + struct RPRNFAState *next; /* next state in linked list */ + int16 elemIdx; /* current pattern element index */ + bool isAbsorbable; /* true if state is in absorbable region */ + int32 counts[FLEXIBLE_ARRAY_MEMBER]; /* repetition counts by depth */ +} RPRNFAState; + +/* + * RPRNFAContext - context for NFA pattern matching execution + * + * Two-flag absorption design: + * hasAbsorbableState: can this context absorb others? (>=1 absorbable state) + * - Monotonic: true->false only, cannot recover once false + * - Used to skip absorption attempts once all absorbable states are gone + * allStatesAbsorbable: can this context be absorbed? (ALL states absorbable) + * - Dynamic: can change false->true (when non-absorbable states die) + * - Used to determine if this context is eligible for absorption + */ +typedef struct RPRNFAContext +{ + struct RPRNFAContext *next; /* next context in linked list */ + struct RPRNFAContext *prev; /* previous context (for reverse traversal) */ + RPRNFAState *states; /* active states (linked list) */ + + int64 matchStartRow; /* row where match started */ + int64 matchEndRow; /* row where match ended (-1 = no match) */ + int64 lastProcessedRow; /* last row processed (for fail depth) */ + RPRNFAState *matchedState; /* FIN state for greedy fallback (cloned) */ + + /* Two-flag absorption optimization */ + bool hasAbsorbableState; /* can absorb others (>=1 absorbable + * state) */ + bool allStatesAbsorbable; /* can be absorbed (ALL states + * absorbable) */ +} RPRNFAContext; + +/* + * NFALengthStats + * + * Statistics for length measurements (min/max/total) used for computing + * average lengths in EXPLAIN ANALYZE output. + */ +typedef struct NFALengthStats +{ + int64 min; /* minimum length */ + int64 max; /* maximum length */ + int64 total; /* total length (for computing average) */ +} NFALengthStats; + typedef struct WindowAggState { ScanState ss; /* its first field is NodeTag */ @@ -2583,6 +2651,49 @@ typedef struct WindowAggState int64 groupheadpos; /* current row's peer group head position */ int64 grouptailpos; /* " " " " tail position (group end+1) */ + /* these fields are used in Row pattern recognition: */ + RPSkipTo rpSkipTo; /* Row Pattern Skip To type */ + struct RPRPattern *rpPattern; /* compiled pattern for NFA execution */ + List *defineVariableList; /* list of row pattern definition + * variables (list of String) */ + List *defineClauseList; /* expression for row pattern definition + * search conditions ExprState list */ + RPRNFAContext *nfaContext; /* active matching contexts (head) */ + RPRNFAContext *nfaContextTail; /* tail of active contexts (for reverse + * traversal) */ + RPRNFAContext *nfaContextFree; /* recycled NFA context nodes */ + RPRNFAState *nfaStateFree; /* recycled NFA state nodes */ + Size nfaStateSize; /* pre-calculated RPRNFAState size */ + bool *nfaVarMatched; /* per-row cache: varMatched[varId] for varId + * < numDefines */ + Bitmapset *defineMatchStartDependent; /* DEFINE vars needing per-context + * evaluation + * (match_start-dependent) */ + bitmapword *nfaVisitedElems; /* elemIdx visited bitmap for cycle + * detection */ + int nfaVisitedNWords; /* number of bitmapwords in + * nfaVisitedElems */ + int64 nfaLastProcessedRow; /* last row processed by NFA (-1 = + * none) */ + + /* NFA statistics for EXPLAIN ANALYZE */ + int64 nfaStatesActive; /* current active states (internal) */ + int64 nfaStatesMax; /* peak active states */ + int64 nfaStatesTotalCreated; /* total states allocated */ + int64 nfaStatesMerged; /* states merged (deduplicated) */ + int64 nfaContextsActive; /* current active contexts (internal) */ + int64 nfaContextsMax; /* peak active contexts */ + int64 nfaContextsTotalCreated; /* total contexts allocated */ + int64 nfaContextsAbsorbed; /* contexts absorbed (optimization) */ + int64 nfaContextsSkipped; /* contexts skipped (SKIP PAST LAST ROW) */ + int64 nfaContextsPruned; /* contexts pruned on first row */ + int64 nfaMatchesSucceeded; /* successful pattern matches */ + int64 nfaMatchesFailed; /* failed pattern matches */ + NFALengthStats nfaMatchLen; /* successful match length stats */ + NFALengthStats nfaFailLen; /* mismatch length stats */ + NFALengthStats nfaAbsorbedLen; /* absorbed context length stats */ + NFALengthStats nfaSkippedLen; /* skipped context length stats */ + MemoryContext partcontext; /* context for partition-lifespan data */ MemoryContext aggcontext; /* shared context for aggregate working data */ MemoryContext curaggcontext; /* current aggregate's working data */ @@ -2610,6 +2721,25 @@ typedef struct WindowAggState TupleTableSlot *agg_row_slot; TupleTableSlot *temp_slot_1; TupleTableSlot *temp_slot_2; + + /* RPR navigation */ + RPRNavOffsetKind navMaxOffsetKind; /* status of navMaxOffset */ + int64 navMaxOffset; /* max backward nav offset (when FIXED) */ + bool hasFirstNav; /* FIRST() present in DEFINE */ + RPRNavOffsetKind navFirstOffsetKind; /* status of navFirstOffset */ + int64 navFirstOffset; /* min FIRST() offset (when FIXED) */ + struct WindowObjectData *nav_winobj; /* winobj for RPR nav fetch */ + int64 nav_slot_pos; /* position cached in nav_slot, or -1 */ + TupleTableSlot *nav_slot; /* slot for PREV/NEXT/FIRST/LAST target row */ + TupleTableSlot *nav_saved_outertuple; /* saved slot during nav swap */ + TupleTableSlot *nav_null_slot; /* all NULL slot */ + int64 nav_match_start; /* match_start for FIRST/LAST nav */ + + /* RPR current match result */ + bool rpr_match_valid; /* true if a match result is set */ + bool rpr_match_matched; /* true if the result was a match */ + int64 rpr_match_start; /* start position of the match result */ + int64 rpr_match_length; /* number of rows matched (0 = empty) */ } WindowAggState; /* ---------------- -- 2.43.0