From 1a9f209676a9f2095bce03411380296b59729b10 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii Date: Sat, 2 May 2026 13:40:29 +0900 Subject: [PATCH v47 4/9] Row pattern recognition patch (planner). --- src/backend/nodes/nodeFuncs.c | 42 + src/backend/optimizer/path/allpaths.c | 79 + src/backend/optimizer/path/costsize.c | 29 + src/backend/optimizer/plan/Makefile | 1 + src/backend/optimizer/plan/createplan.c | 417 ++++- src/backend/optimizer/plan/meson.build | 1 + src/backend/optimizer/plan/planner.c | 17 +- src/backend/optimizer/plan/rpr.c | 1993 +++++++++++++++++++++ src/backend/optimizer/plan/setrefs.c | 29 +- src/backend/optimizer/prep/prepjointree.c | 9 + src/include/nodes/plannodes.h | 103 ++ src/include/optimizer/rpr.h | 65 + 12 files changed, 2781 insertions(+), 4 deletions(-) create mode 100644 src/backend/optimizer/plan/rpr.c create mode 100644 src/include/optimizer/rpr.h diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index f968ac68314..651d049cfa2 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -69,6 +69,9 @@ exprType(const Node *expr) case T_MergeSupportFunc: type = ((const MergeSupportFunc *) expr)->msftype; break; + case T_RPRNavExpr: + type = ((const RPRNavExpr *) expr)->resulttype; + break; case T_SubscriptingRef: type = ((const SubscriptingRef *) expr)->refrestype; break; @@ -853,6 +856,9 @@ exprCollation(const Node *expr) case T_MergeSupportFunc: coll = ((const MergeSupportFunc *) expr)->msfcollid; break; + case T_RPRNavExpr: + coll = ((const RPRNavExpr *) expr)->resultcollid; + break; case T_SubscriptingRef: coll = ((const SubscriptingRef *) expr)->refcollid; break; @@ -1162,6 +1168,9 @@ exprSetCollation(Node *expr, Oid collation) case T_MergeSupportFunc: ((MergeSupportFunc *) expr)->msfcollid = collation; break; + case T_RPRNavExpr: + ((RPRNavExpr *) expr)->resultcollid = collation; + break; case T_SubscriptingRef: ((SubscriptingRef *) expr)->refcollid = collation; break; @@ -1437,6 +1446,9 @@ exprLocation(const Node *expr) case T_MergeSupportFunc: loc = ((const MergeSupportFunc *) expr)->location; break; + case T_RPRNavExpr: + loc = ((const RPRNavExpr *) expr)->location; + break; case T_SubscriptingRef: /* just use container argument's location */ loc = exprLocation((Node *) ((const SubscriptingRef *) expr)->refexpr); @@ -2198,6 +2210,18 @@ expression_tree_walker_impl(Node *node, return true; } break; + case T_RPRNavExpr: + { + RPRNavExpr *expr = (RPRNavExpr *) node; + + if (WALK(expr->arg)) + return true; + if (expr->offset_arg && WALK(expr->offset_arg)) + return true; + if (expr->compound_offset_arg && WALK(expr->compound_offset_arg)) + return true; + } + break; case T_SubscriptingRef: { SubscriptingRef *sbsref = (SubscriptingRef *) node; @@ -2431,6 +2455,8 @@ expression_tree_walker_impl(Node *node, return true; if (WALK(wc->endOffset)) return true; + if (WALK(wc->defineClause)) + return true; } break; case T_CTECycleClause: @@ -2823,6 +2849,8 @@ query_tree_walker_impl(Query *query, return true; if (WALK(wc->endOffset)) return true; + if (WALK(wc->defineClause)) + return true; } } @@ -3143,6 +3171,18 @@ expression_tree_mutator_impl(Node *node, return (Node *) newnode; } break; + case T_RPRNavExpr: + { + RPRNavExpr *nav = (RPRNavExpr *) node; + RPRNavExpr *newnode; + + FLATCOPY(newnode, nav, RPRNavExpr); + MUTATE(newnode->arg, nav->arg, Expr *); + MUTATE(newnode->offset_arg, nav->offset_arg, Expr *); + MUTATE(newnode->compound_offset_arg, nav->compound_offset_arg, Expr *); + return (Node *) newnode; + } + break; case T_SubscriptingRef: { SubscriptingRef *sbsref = (SubscriptingRef *) node; @@ -3565,6 +3605,7 @@ expression_tree_mutator_impl(Node *node, MUTATE(newnode->orderClause, wc->orderClause, List *); MUTATE(newnode->startOffset, wc->startOffset, Node *); MUTATE(newnode->endOffset, wc->endOffset, Node *); + MUTATE(newnode->defineClause, wc->defineClause, List *); return (Node *) newnode; } break; @@ -3914,6 +3955,7 @@ query_tree_mutator_impl(Query *query, FLATCOPY(newnode, wc, WindowClause); MUTATE(newnode->startOffset, wc->startOffset, Node *); MUTATE(newnode->endOffset, wc->endOffset, Node *); + MUTATE(newnode->defineClause, wc->defineClause, List *); resultlist = lappend(resultlist, (Node *) newnode); } diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 61093f222a1..470029e42e0 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -2463,6 +2463,17 @@ find_window_run_conditions(Query *subquery, AttrNumber attno, wclause = (WindowClause *) list_nth(subquery->windowClause, wfunc->winref - 1); + /* + * If a DEFINE clause exists, we cannot push down a run condition. In the + * case, a window partition (or frame) is divided into multiple reduced + * frames and each frame should be evaluated to the end of the partition + * (or full frame end). This means we cannot apply the run condition + * optimization because it stops evaluation window functions in certain + * cases. + */ + if (wclause->defineClause != NIL) + return false; + req.type = T_SupportRequestWFuncMonotonic; req.window_func = wfunc; req.window_clause = wclause; @@ -4739,6 +4750,74 @@ remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel, if (contain_volatile_functions(texpr)) continue; + /* + * If any RPR (Row Pattern Recognition) window clause references this + * column in its DEFINE clause, don't remove it. The DEFINE + * expression needs these columns in the tuplestore slot for pattern + * matching evaluation, even if the outer query doesn't reference + * them. + */ + if (IsA(texpr, Var)) + { + Var *var = (Var *) texpr; + ListCell *wlc; + bool needed_by_define = false; + + foreach(wlc, subquery->windowClause) + { + WindowClause *wc = lfirst_node(WindowClause, wlc); + + if (wc->defineClause != NIL) + { + List *vars = pull_var_clause((Node *) wc->defineClause, 0); + ListCell *vlc; + + foreach(vlc, vars) + { + Var *dvar = (Var *) lfirst(vlc); + + if (dvar->varattno == var->varattno) + { + needed_by_define = true; + break; + } + } + list_free(vars); + if (needed_by_define) + break; + } + } + if (needed_by_define) + continue; + } + + /* + * If it's a window function referencing a window clause with RPR, + * don't remove it. Even when the window function result is unused by + * the outer query, the RPR pattern matching (frame reduction via + * DEFINE/PATTERN) must still execute. Replacing this with NULL would + * leave no active window functions for the WindowClause, causing the + * planner to omit the WindowAgg node entirely. + */ + if (IsA(texpr, WindowFunc)) + { + WindowFunc *wfunc = (WindowFunc *) texpr; + ListCell *wlc; + + foreach(wlc, subquery->windowClause) + { + WindowClause *wc = lfirst_node(WindowClause, wlc); + + if (wc->winref == wfunc->winref && + wc->defineClause != NIL) + { + break; + } + } + if (wlc != NULL) + continue; + } + /* * OK, we don't need it. Replace the expression with a NULL constant. * Preserve the exposed type of the expression, in case something diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 1c575e56ff6..b38cad9f121 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -104,6 +104,7 @@ #include "optimizer/placeholder.h" #include "optimizer/plancat.h" #include "optimizer/restrictinfo.h" +#include "optimizer/rpr.h" #include "parser/parsetree.h" #include "utils/lsyscache.h" #include "utils/selfuncs.h" @@ -3228,7 +3229,35 @@ cost_windowagg(Path *path, PlannerInfo *root, * many rows the window function will fetch, it's hard to do better. In * any case, it's a good estimate for all the built-in window functions, * so we'll just do this for now. + * + * Moreover, if row pattern recognition is used, we charge the DEFINE + * expressions once per tuple for each variable that appears in PATTERN. */ + if (winclause->rpPattern) + { + List *pattern_vars; + ListCell *lc2; + QualCost defcosts; + + pattern_vars = collectPatternVariables(winclause->rpPattern); + + foreach(lc2, pattern_vars) + { + char *ptname = strVal(lfirst(lc2)); + + foreach_node(TargetEntry, def, winclause->defineClause) + { + if (!strcmp(ptname, def->resname)) + { + cost_qual_eval_node(&defcosts, (Node *) def->expr, root); + startup_cost += defcosts.startup; + total_cost += defcosts.per_tuple * input_tuples; + } + } + } + list_free_deep(pattern_vars); + } + foreach(lc, windowFuncs) { WindowFunc *wfunc = lfirst_node(WindowFunc, lc); diff --git a/src/backend/optimizer/plan/Makefile b/src/backend/optimizer/plan/Makefile index 80ef162e484..7e0167789d8 100644 --- a/src/backend/optimizer/plan/Makefile +++ b/src/backend/optimizer/plan/Makefile @@ -19,6 +19,7 @@ OBJS = \ planagg.o \ planmain.o \ planner.o \ + rpr.o \ setrefs.o \ subselect.o diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index de6a183da79..52205cc7159 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -16,6 +16,7 @@ */ #include "postgres.h" +#include "common/int.h" #include "access/sysattr.h" #include "access/transam.h" #include "catalog/pg_class.h" @@ -36,6 +37,7 @@ #include "optimizer/prep.h" #include "optimizer/restrictinfo.h" #include "optimizer/subselect.h" +#include "optimizer/rpr.h" #include "optimizer/tlist.h" #include "parser/parse_clause.h" #include "parser/parsetree.h" @@ -288,7 +290,11 @@ static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators, static WindowAgg *make_windowagg(List *tlist, WindowClause *wc, int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, - List *runCondition, List *qual, bool topWindow, + List *runCondition, RPSkipTo rpSkipTo, + RPRPattern *compiledPattern, + List *defineClause, + Bitmapset *defineMatchStartDependent, + List *qual, bool topWindow, Plan *lefttree); static Group *make_group(List *tlist, List *qual, int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, @@ -2457,6 +2463,363 @@ create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path) return plan; } +/* + * NavOffsetContext - context for compute_nav_offsets walker. + * + * Collects both backward reach (PREV, LAST-with-offset, compound + * PREV_LAST/NEXT_LAST) and forward-from-match-start reach (FIRST, + * compound PREV_FIRST/NEXT_FIRST) in a single tree walk. + */ +typedef struct NavOffsetContext +{ + int64 maxOffset; /* max PREV/LAST backward offset (>= 0) */ + bool maxNeedsEval; /* non-constant PREV/LAST offset found */ + bool maxOverflow; /* constant offset overflow detected */ + int64 firstOffset; /* min FIRST offset (may be negative for + * PREV_FIRST) */ + bool hasFirst; /* any FIRST node found */ + bool firstNeedsEval; /* non-constant FIRST offset found */ +} NavOffsetContext; + +/* + * Helper: extract constant offset from an expression, handling NULL/negative. + * If expr is NULL, returns defaultOffset. + * Returns true if constant, false if non-constant (Param, cast, etc.). + */ +static bool +extract_const_offset(Expr *expr, int64 defaultOffset, int64 *result) +{ + if (expr == NULL) + { + *result = defaultOffset; + return true; + } + + if (IsA(expr, Const)) + { + Const *c = (Const *) expr; + + if (c->constisnull) + *result = 0; /* runtime error; safe placeholder */ + else + { + *result = DatumGetInt64(c->constvalue); + if (*result < 0) + *result = 0; /* runtime error; safe placeholder */ + } + return true; + } + + return false; /* non-constant */ +} + +/* + * nav_offset_walker + * Expression tree walker for compute_nav_offsets. + * + * For each RPRNavExpr found, extract its constant offset(s) and update the + * NavOffsetContext with the maximum backward reach (maxOffset) and minimum + * forward reach (firstOffset). Handles simple navigation (PREV, NEXT, + * FIRST, LAST) and compound forms (PREV_FIRST, NEXT_FIRST, PREV_LAST, + * NEXT_LAST) by combining inner and outer offsets. + * + * Non-constant offsets set maxNeedsEval or firstNeedsEval. Overflow sets + * maxOverflow or firstOverflow for RETAIN_ALL fallback. + */ +static bool +nav_offset_walker(Node *node, void *ctx) +{ + NavOffsetContext *context = (NavOffsetContext *) ctx; + + if (node == NULL) + return false; + + if (IsA(node, RPRNavExpr)) + { + RPRNavExpr *nav = (RPRNavExpr *) node; + + /* + * Simple PREV(v, N) and LAST(v, N): backward reach from currentpos. + * LAST without offset = currentpos, no backward reach. NEXT: forward + * only, irrelevant for trim. + */ + if (nav->kind == RPR_NAV_PREV || + (nav->kind == RPR_NAV_LAST && nav->offset_arg != NULL)) + { + if (!context->maxNeedsEval) + { + int64 offset; + + if (extract_const_offset(nav->offset_arg, 1, &offset)) + context->maxOffset = Max(context->maxOffset, offset); + else + context->maxNeedsEval = true; + } + } + + /* + * Simple FIRST(v, N): forward reach from match_start. Smaller N means + * older rows needed. + */ + if (nav->kind == RPR_NAV_FIRST) + { + context->hasFirst = true; + + if (!context->firstNeedsEval) + { + int64 offset; + + if (extract_const_offset(nav->offset_arg, 0, &offset)) + context->firstOffset = Min(context->firstOffset, offset); + else + context->firstNeedsEval = true; + } + } + + /* + * Compound PREV_LAST / NEXT_LAST: base = currentpos. PREV_LAST(v, N, + * M): target = currentpos - N - M -> lookback = N + M NEXT_LAST(v, N, + * M): target = currentpos - N + M -> lookback = max(N - M, 0) + */ + if (nav->kind == RPR_NAV_PREV_LAST || + nav->kind == RPR_NAV_NEXT_LAST) + { + if (!context->maxNeedsEval) + { + int64 inner, + outer, + combined; + + if (extract_const_offset(nav->offset_arg, 0, &inner) && + extract_const_offset(nav->compound_offset_arg, 1, &outer)) + { + if (nav->kind == RPR_NAV_PREV_LAST) + { + if (pg_add_s64_overflow(inner, outer, &combined)) + { + context->maxOverflow = true; + return false; + } + } + else + combined = (inner > outer) ? inner - outer : 0; + + context->maxOffset = Max(context->maxOffset, combined); + } + else + context->maxNeedsEval = true; + } + } + + /* + * Compound PREV_FIRST / NEXT_FIRST: base = match_start. PREV_FIRST(v, + * N, M): target = match_start + N - M NEXT_FIRST(v, N, M): target = + * match_start + N + M The combined offset (N+/-M) from match_start + * can be negative, meaning rows before match_start are needed. + */ + if (nav->kind == RPR_NAV_PREV_FIRST || + nav->kind == RPR_NAV_NEXT_FIRST) + { + context->hasFirst = true; + + if (!context->firstNeedsEval) + { + int64 inner, + outer, + combined; + + if (extract_const_offset(nav->offset_arg, 0, &inner) && + extract_const_offset(nav->compound_offset_arg, 1, &outer)) + { + if (nav->kind == RPR_NAV_PREV_FIRST) + { + /* + * combined = inner - outer. Both are non-negative, + * so the result >= -INT64_MAX, which cannot underflow + * int64. No overflow check needed. + */ + combined = inner - outer; + } + else + { + /* + * NEXT_FIRST: combined = inner + outer. This can + * overflow, but the result is always >= 0, so it + * never updates firstOffset (which tracks the + * minimum). Clamp to INT64_MAX on overflow. + */ + if (pg_add_s64_overflow(inner, outer, &combined)) + combined = INT64_MAX; + } + + context->firstOffset = Min(context->firstOffset, combined); + } + else + context->firstNeedsEval = true; + } + } + + /* Don't walk into RPRNavExpr children */ + return false; + } + + return expression_tree_walker(node, nav_offset_walker, ctx); +} + +/* + * compute_nav_offsets + * Compute navigation offsets for tuplestore trim in a single pass. + * + * Walks all DEFINE clause expressions once, computing: + * - maxOffset: max backward reach from PREV, LAST-with-offset, + * compound PREV_LAST/NEXT_LAST + * - hasFirst/firstOffset: min forward-from-match-start reach from + * FIRST, compound PREV_FIRST/NEXT_FIRST + */ +static void +compute_nav_offsets(List *defineClause, + RPRNavOffsetKind *maxKind, int64 *maxResult, + bool *hasFirst, + RPRNavOffsetKind *firstKind, int64 *firstResult) +{ + NavOffsetContext ctx; + ListCell *lc; + + ctx.maxOffset = 0; + ctx.maxNeedsEval = false; + ctx.maxOverflow = false; + ctx.firstOffset = INT64_MAX; /* sentinel: no FIRST found yet */ + ctx.hasFirst = false; + ctx.firstNeedsEval = false; + + foreach(lc, defineClause) + { + TargetEntry *te = (TargetEntry *) lfirst(lc); + + nav_offset_walker((Node *) te->expr, &ctx); + } + + /* Max backward offset */ + if (ctx.maxOverflow) + { + *maxKind = RPR_NAV_OFFSET_RETAIN_ALL; + *maxResult = 0; + } + else if (ctx.maxNeedsEval) + { + *maxKind = RPR_NAV_OFFSET_NEEDS_EVAL; + *maxResult = 0; + } + else + { + *maxKind = RPR_NAV_OFFSET_FIXED; + *maxResult = ctx.maxOffset; + } + + /* First offset (can be negative for compound PREV_FIRST) */ + *hasFirst = ctx.hasFirst; + if (ctx.hasFirst) + { + if (ctx.firstNeedsEval) + { + *firstKind = RPR_NAV_OFFSET_NEEDS_EVAL; + *firstResult = 0; + } + else if (ctx.firstOffset == INT64_MAX) + { + *firstKind = RPR_NAV_OFFSET_FIXED; + *firstResult = 0; /* only implicit FIRST(v) */ + } + else + { + *firstKind = RPR_NAV_OFFSET_FIXED; + *firstResult = ctx.firstOffset; /* may be negative */ + } + } + else + { + *firstKind = RPR_NAV_OFFSET_FIXED; + *firstResult = 0; + } +} + +/* + * has_match_start_dependency + * Check if an expression tree contains navigation that depends on + * match_start: FIRST, LAST-with-offset, or compound PREV_FIRST/ + * NEXT_FIRST/PREV_LAST/NEXT_LAST with offset. Such expressions + * require per-context re-evaluation during NFA processing. + * + * LAST without offset always resolves to currentpos and is + * match_start-independent. + */ +static bool +has_match_start_dependency(Node *node, void *context) +{ + if (node == NULL) + return false; + + if (IsA(node, RPRNavExpr)) + { + RPRNavExpr *nav = (RPRNavExpr *) node; + + if (nav->kind == RPR_NAV_FIRST) + return true; + if (nav->kind == RPR_NAV_LAST && nav->offset_arg != NULL) + return true; + + /* Compound kinds with FIRST base depend on match_start */ + if (nav->kind == RPR_NAV_PREV_FIRST || + nav->kind == RPR_NAV_NEXT_FIRST) + return true; + + /* + * PREV_LAST/NEXT_LAST: inner is LAST, which uses currentpos. + * match_start-dependent only if inner has offset (clamped to + * match_start). + */ + if ((nav->kind == RPR_NAV_PREV_LAST || + nav->kind == RPR_NAV_NEXT_LAST) && + nav->offset_arg != NULL) + return true; + + /* Check children (arg may contain further nav expressions) */ + return has_match_start_dependency((Node *) nav->arg, context); + } + + return expression_tree_walker(node, has_match_start_dependency, NULL); +} + +/* + * compute_match_start_dependent + * Build a Bitmapset of DEFINE variable indices whose expressions + * depend on match_start (contain FIRST, LAST-with-offset, or + * compound PREV_FIRST/NEXT_FIRST/PREV_LAST/NEXT_LAST with offset). + * + * Variables in this set require per-context re-evaluation during NFA + * processing, because different contexts may have different match_start + * values. + */ +static Bitmapset * +compute_match_start_dependent(List *defineClause) +{ + Bitmapset *result = NULL; + ListCell *lc; + int varIdx = 0; + + foreach(lc, defineClause) + { + TargetEntry *te = (TargetEntry *) lfirst(lc); + + if (has_match_start_dependency((Node *) te->expr, NULL)) + result = bms_add_member(result, varIdx); + + varIdx++; + } + + return result; +} + /* * create_windowagg_plan * @@ -2481,6 +2844,11 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) Oid *ordOperators; Oid *ordCollations; ListCell *lc; + List *defineVariableList = NIL; + List *filteredDefineClause = NIL; + RPRPattern *compiledPattern = NULL; + Bitmapset *matchStartDependent = NULL; + /* * Choice of tlist here is motivated by the fact that WindowAgg will be @@ -2531,6 +2899,28 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) ordNumCols++; } + /* Build RPR pattern and defineVariableList */ + if (wc->rpPattern) + { + /* + * Build defineVariableList from defineClause. The parser already + * rejects DEFINE variables not used in PATTERN, so no filtering is + * needed. + */ + buildDefineVariableList(wc->defineClause, &defineVariableList); + filteredDefineClause = wc->defineClause; + + /* Identify match_start-dependent DEFINE variables */ + matchStartDependent = compute_match_start_dependent(wc->defineClause); + + /* Compile and optimize RPR patterns */ + compiledPattern = buildRPRPattern(wc->rpPattern, + defineVariableList, + wc->rpSkipTo, + wc->frameOptions, + !bms_is_empty(matchStartDependent)); + } + /* And finally we can make the WindowAgg node */ plan = make_windowagg(tlist, wc, @@ -2543,6 +2933,10 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) ordOperators, ordCollations, best_path->runCondition, + wc->rpSkipTo, + compiledPattern, + filteredDefineClause, + matchStartDependent, best_path->qual, best_path->topwindow, subplan); @@ -6613,7 +7007,11 @@ static WindowAgg * make_windowagg(List *tlist, WindowClause *wc, int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, - List *runCondition, List *qual, bool topWindow, Plan *lefttree) + List *runCondition, RPSkipTo rpSkipTo, + RPRPattern *compiledPattern, + List *defineClause, + Bitmapset *defineMatchStartDependent, + List *qual, bool topWindow, Plan *lefttree) { WindowAgg *node = makeNode(WindowAgg); Plan *plan = &node->plan; @@ -6640,6 +7038,21 @@ make_windowagg(List *tlist, WindowClause *wc, node->inRangeAsc = wc->inRangeAsc; node->inRangeNullsFirst = wc->inRangeNullsFirst; node->topWindow = topWindow; + node->rpSkipTo = rpSkipTo; + + /* Store compiled pattern for NFA execution */ + node->rpPattern = compiledPattern; + + node->defineClause = defineClause; + + /* Store pre-computed match_start dependency bitmapset */ + node->defineMatchStartDependent = defineMatchStartDependent; + + /* Compute nav offsets for tuplestore trim optimization */ + compute_nav_offsets(defineClause, + &node->navMaxOffsetKind, &node->navMaxOffset, + &node->hasFirstNav, + &node->navFirstOffsetKind, &node->navFirstOffset); plan->targetlist = tlist; plan->lefttree = lefttree; diff --git a/src/backend/optimizer/plan/meson.build b/src/backend/optimizer/plan/meson.build index c565b2adbcc..5b2381cd774 100644 --- a/src/backend/optimizer/plan/meson.build +++ b/src/backend/optimizer/plan/meson.build @@ -7,6 +7,7 @@ backend_sources += files( 'planagg.c', 'planmain.c', 'planner.c', + 'rpr.c', 'setrefs.c', 'subselect.c', ) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 933dcbf5004..703c6ab7432 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1053,6 +1053,9 @@ subquery_planner(PlannerGlobal *glob, Query *parse, char *plan_name, EXPRKIND_LIMIT); wc->endOffset = preprocess_expression(root, wc->endOffset, EXPRKIND_LIMIT); + wc->defineClause = (List *) preprocess_expression(root, + (Node *) wc->defineClause, + EXPRKIND_TARGET); } parse->limitOffset = preprocess_expression(root, parse->limitOffset, @@ -6112,6 +6115,14 @@ optimize_window_clauses(PlannerInfo *root, WindowFuncLists *wflists) if (wflists->windowFuncs[wc->winref] == NIL) continue; + /* + * If a DEFINE clause exists, do not let support functions replace the + * frame with a non-RPR-compatible one. RPR windows require ROWS + * BETWEEN CURRENT ROW AND ... + */ + if (wc->defineClause != NIL) + continue; + foreach(lc2, wflists->windowFuncs[wc->winref]) { SupportRequestOptimizeWindowClause req; @@ -6195,7 +6206,11 @@ optimize_window_clauses(PlannerInfo *root, WindowFuncLists *wflists) equal(wc->orderClause, existing_wc->orderClause) && wc->frameOptions == existing_wc->frameOptions && equal(wc->startOffset, existing_wc->startOffset) && - equal(wc->endOffset, existing_wc->endOffset)) + equal(wc->endOffset, existing_wc->endOffset) && + wc->rpSkipTo == existing_wc->rpSkipTo && + wc->initial == existing_wc->initial && + equal(wc->defineClause, existing_wc->defineClause) && + equal(wc->rpPattern, existing_wc->rpPattern)) { ListCell *lc4; diff --git a/src/backend/optimizer/plan/rpr.c b/src/backend/optimizer/plan/rpr.c new file mode 100644 index 00000000000..2543170c374 --- /dev/null +++ b/src/backend/optimizer/plan/rpr.c @@ -0,0 +1,1993 @@ +/*------------------------------------------------------------------------- + * + * rpr.c + * Row Pattern Recognition pattern compilation for planner + * + * This file contains functions for optimizing RPR pattern AST and + * compiling it to a flat element array for NFA execution by WindowAgg. + * + * Key components: + * 1. Pattern Optimization: Simplifies patterns before compilation + * (e.g., flatten nested SEQ/ALT, merge consecutive vars) + * 2. Pattern Compilation: Converts AST to flat element array for NFA + * 3. Absorption Analysis: Computes flags for O(n^2)->O(n) optimization + * + * Context Absorption Optimization: + * When a pattern starts with a greedy unbounded element (e.g., A+ or (A B)+), + * newer contexts cannot produce longer matches than older contexts. + * By absorbing (eliminating) redundant newer contexts, we reduce + * complexity from O(n^2) to O(n) for patterns like A+ B. + * + * The absorption analysis uses two element flags: + * - RPR_ELEM_ABSORBABLE: marks WHERE to compare (judgment point) + * - RPR_ELEM_ABSORBABLE_BRANCH: marks the absorbable region + * + * See computeAbsorbability() and the detailed comments before + * isUnboundedStart() for the full design explanation. + * + * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/plan/rpr.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "optimizer/rpr.h" + +/* Forward declarations - pattern comparison */ +static bool rprPatternEqual(RPRPatternNode *a, RPRPatternNode *b); +static bool rprPatternChildrenEqual(List *a, List *b); + +/* Forward declarations - pattern optimization (shared) */ +static RPRPatternNode *tryUnwrapSingleChild(RPRPatternNode *pattern); + +/* Forward declarations - SEQ optimization */ +static List *flattenSeqChildren(List *children); +static List *mergeConsecutiveVars(List *children); +static List *mergeConsecutiveGroups(List *children); +static List *mergeConsecutiveAlts(List *children); +static List *mergeGroupPrefixSuffix(List *children); +static RPRPatternNode *optimizeSeqPattern(RPRPatternNode *pattern); + +/* Forward declarations - ALT optimization */ +static List *flattenAltChildren(List *children); +static List *removeDuplicateAlternatives(List *children); +static RPRPatternNode *optimizeAltPattern(RPRPatternNode *pattern); + +/* Forward declarations - GROUP optimization */ +static RPRPatternNode *tryMultiplyQuantifiers(RPRPatternNode *pattern); +static RPRPatternNode *tryUnwrapGroup(RPRPatternNode *pattern); +static RPRPatternNode *optimizeGroupPattern(RPRPatternNode *pattern); + +/* Forward declarations - optimization dispatcher */ +static RPRPatternNode *optimizeRPRPattern(RPRPatternNode *pattern); + +/* Forward declarations - pattern compilation */ +static int collectDefineVariables(List *defineVariableList, char **varNames); +static void scanRPRPatternRecursive(RPRPatternNode *node, char **varNames, + int *numVars, int *numElements, + RPRDepth depth, RPRDepth *maxDepth); +static void scanRPRPattern(RPRPatternNode *node, char **varNames, int *numVars, + int *numElements, RPRDepth *maxDepth); +static RPRPattern *allocateRPRPattern(int numVars, int numElements, + RPRDepth maxDepth, char **varNamesStack); +static RPRVarId getVarIdFromPattern(RPRPattern *pat, const char *varName); +static bool fillRPRPatternVar(RPRPatternNode *node, RPRPattern *pat, + int *idx, RPRDepth depth); +static bool fillRPRPatternGroup(RPRPatternNode *node, RPRPattern *pat, + int *idx, RPRDepth depth); +static bool fillRPRPatternAlt(RPRPatternNode *node, RPRPattern *pat, + int *idx, RPRDepth depth); +static bool fillRPRPattern(RPRPatternNode *node, RPRPattern *pat, + int *idx, RPRDepth depth); +static void finalizeRPRPattern(RPRPattern *result); + +/* Forward declarations - context absorption */ +static bool isFixedLengthChildren(RPRPattern *pattern, RPRElemIdx idx, + RPRDepth scopeDepth); +static bool isUnboundedStart(RPRPattern *pattern, RPRElemIdx idx); +static void computeAbsorbabilityRecursive(RPRPattern *pattern, + RPRElemIdx startIdx, + bool *hasAbsorbable); +static void computeAbsorbability(RPRPattern *pattern); + +/* + * rprPatternEqual + * Compare two RPRPatternNode trees for equality. + * + * Returns true if the trees are structurally identical. + */ +static bool +rprPatternEqual(RPRPatternNode *a, RPRPatternNode *b) +{ + /* Pattern nodes in children lists must never be NULL */ + Assert(a != NULL && b != NULL); + + /* Must have same node type and quantifiers */ + if (a->nodeType != b->nodeType) + return false; + if (a->min != b->min || a->max != b->max) + return false; + if (a->reluctant != b->reluctant) + return false; + + switch (a->nodeType) + { + case RPR_PATTERN_VAR: + return strcmp(a->varName, b->varName) == 0; + + case RPR_PATTERN_SEQ: + case RPR_PATTERN_ALT: + case RPR_PATTERN_GROUP: + return rprPatternChildrenEqual(a->children, b->children); + } + + return false; /* keep compiler quiet */ +} + +/* + * rprPatternChildrenEqual + * Compare children lists of two pattern nodes for equality. + * + * Returns true if the children lists are structurally identical. + */ +static bool +rprPatternChildrenEqual(List *a, List *b) +{ + ListCell *lca, + *lcb; + + if (list_length(a) != list_length(b)) + return false; + + forboth(lca, a, lcb, b) + { + if (!rprPatternEqual((RPRPatternNode *) lfirst(lca), + (RPRPatternNode *) lfirst(lcb))) + return false; + } + + return true; +} + +/* + * tryUnwrapSingleChild + * Try to unwrap pattern node with single child. + * + * Examples (internal node representation): + * SEQ[A] -> A (single-element sequence becomes the element) + * ALT[A] -> A (single-alternative becomes the alternative) + * + * If pattern has exactly one child, return the child directly. + * Otherwise returns the pattern unchanged. + * Used by both SEQ and ALT optimization. + */ +static RPRPatternNode * +tryUnwrapSingleChild(RPRPatternNode *pattern) +{ + if (list_length(pattern->children) == 1) + return (RPRPatternNode *) linitial(pattern->children); + + return pattern; +} + +/* + * flattenSeqChildren + * Recursively optimize children and flatten nested SEQ. + * + * Example: + * SEQ(A, SEQ(B, C)) -> SEQ(A, B, C) + * + * Returns a new list with optimized children, with nested SEQ children + * flattened into the parent list. + */ +static List * +flattenSeqChildren(List *children) +{ + ListCell *lc; + List *newChildren = NIL; + + foreach(lc, children) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + RPRPatternNode *opt = optimizeRPRPattern(child); + + /* GROUP{1,1} should have been unwrapped by optimizeGroupPattern */ + Assert(!(opt->nodeType == RPR_PATTERN_GROUP && + opt->min == 1 && opt->max == 1 && opt->reluctant == false)); + + if (opt->nodeType == RPR_PATTERN_SEQ) + { + newChildren = list_concat(newChildren, + list_copy(opt->children)); + } + else + { + newChildren = lappend(newChildren, opt); + } + } + + return newChildren; +} + +/* + * mergeConsecutiveVars + * Merge consecutive identical VAR nodes. + * + * Examples: + * A{m1,M1} A{m2,M2} -> A{m1+m2, M1+M2} where INF + x = INF. + * + * Only merges non-reluctant VAR nodes with the same variable name. + */ +static List * +mergeConsecutiveVars(List *children) +{ + ListCell *lc; + List *mergedChildren = NIL; + RPRPatternNode *prev = NULL; + + foreach(lc, children) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + + if (child->nodeType == RPR_PATTERN_VAR && child->reluctant == false) + { + /* ---------------------- + * Can merge consecutive VAR nodes if: + * 1. Same variable name + * 2. No min overflow: prev->min + child->min <= INF + * 3. No max overflow: prev->max + child->max <= INF (or either is INF) + */ + if (prev != NULL && + strcmp(prev->varName, child->varName) == 0 && + prev->min <= RPR_QUANTITY_INF - child->min && + (prev->max <= RPR_QUANTITY_INF - child->max || + prev->max == RPR_QUANTITY_INF || + child->max == RPR_QUANTITY_INF)) + { + /* + * Merge: accumulate min/max into prev. prev is guaranteed to + * be a non-reluctant VAR by the outer condition. + */ + Assert(prev->nodeType == RPR_PATTERN_VAR && prev->reluctant == false); + + prev->min += child->min; + + if (prev->max == RPR_QUANTITY_INF || + child->max == RPR_QUANTITY_INF) + prev->max = RPR_QUANTITY_INF; + else + prev->max += child->max; + } + else + { + /* Flush previous and start new */ + if (prev != NULL) + mergedChildren = lappend(mergedChildren, prev); + prev = child; + } + } + else + { + /* Non-mergeable - flush previous */ + if (prev != NULL) + mergedChildren = lappend(mergedChildren, prev); + mergedChildren = lappend(mergedChildren, child); + prev = NULL; + } + } + + /* Flush remaining */ + if (prev != NULL) + mergedChildren = lappend(mergedChildren, prev); + + return mergedChildren; +} + +/* + * mergeConsecutiveGroups + * Merge consecutive identical GROUP nodes. + * + * Example: + * (A B)+ (A B)+ -> (A B){2,} + * + * Only merges non-reluctant GROUP nodes with identical children. + */ +static List * +mergeConsecutiveGroups(List *children) +{ + ListCell *lc; + List *mergedChildren = NIL; + RPRPatternNode *prev = NULL; + + foreach(lc, children) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + + if (child->nodeType == RPR_PATTERN_GROUP && child->reluctant == false) + { + /* ---------------------- + * Can merge consecutive GROUP nodes if: + * 1. Identical children + * 2. No min overflow: prev->min + child->min <= INF + * 3. No max overflow: prev->max + child->max <= INF (or either is INF) + */ + if (prev != NULL && + rprPatternChildrenEqual(prev->children, child->children) && + prev->min <= RPR_QUANTITY_INF - child->min && + (prev->max <= RPR_QUANTITY_INF - child->max || + prev->max == RPR_QUANTITY_INF || + child->max == RPR_QUANTITY_INF)) + { + /* + * Merge: accumulate min/max into prev. prev is guaranteed to + * be a non-reluctant GROUP by the outer condition. + */ + Assert(prev->nodeType == RPR_PATTERN_GROUP && prev->reluctant == false); + + prev->min += child->min; + + if (prev->max == RPR_QUANTITY_INF || + child->max == RPR_QUANTITY_INF) + prev->max = RPR_QUANTITY_INF; + else + prev->max += child->max; + } + else + { + /* Flush previous and start new */ + if (prev != NULL) + mergedChildren = lappend(mergedChildren, prev); + prev = child; + } + } + else + { + /* Non-mergeable - flush previous */ + if (prev != NULL) + mergedChildren = lappend(mergedChildren, prev); + mergedChildren = lappend(mergedChildren, child); + prev = NULL; + } + } + + /* Flush remaining */ + if (prev != NULL) + mergedChildren = lappend(mergedChildren, prev); + + return mergedChildren; +} + +/* + * mergeConsecutiveAlts + * Merge consecutive identical ALT nodes into a GROUP. + * + * Example: + * (A | B) (A | B) (A | B) -> (A | B){3} + * + * After GROUP{1,1} unwrap, bare alternations like (A | B) become ALT nodes + * in the SEQ. This step detects consecutive identical ALT nodes and wraps + * them in a GROUP with the appropriate quantifier. + */ +static List * +mergeConsecutiveAlts(List *children) +{ + ListCell *lc; + List *mergedChildren = NIL; + RPRPatternNode *prev = NULL; + int count = 0; + + foreach(lc, children) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + + if (child->nodeType == RPR_PATTERN_ALT && child->reluctant == false) + { + if (prev != NULL && + rprPatternChildrenEqual(prev->children, child->children)) + { + /* Same ALT as prev - accumulate */ + count++; + } + else + { + /* Different ALT or first ALT - flush previous */ + if (prev != NULL) + { + if (count > 1) + { + /* Wrap in GROUP{count,count}(ALT) */ + RPRPatternNode *group = makeNode(RPRPatternNode); + + group->nodeType = RPR_PATTERN_GROUP; + group->min = count; + group->max = count; + group->reluctant = false; + group->reluctant_location = -1; + group->location = -1; + group->children = list_make1(prev); + mergedChildren = lappend(mergedChildren, group); + } + else + mergedChildren = lappend(mergedChildren, prev); + } + prev = child; + count = 1; + } + } + else + { + /* Non-ALT - flush previous */ + if (prev != NULL) + { + if (count > 1) + { + RPRPatternNode *group = makeNode(RPRPatternNode); + + group->nodeType = RPR_PATTERN_GROUP; + group->min = count; + group->max = count; + group->reluctant = false; + group->reluctant_location = -1; + group->location = -1; + group->children = list_make1(prev); + mergedChildren = lappend(mergedChildren, group); + } + else + mergedChildren = lappend(mergedChildren, prev); + } + mergedChildren = lappend(mergedChildren, child); + prev = NULL; + count = 0; + } + } + + /* Flush remaining */ + if (prev != NULL) + { + if (count > 1) + { + RPRPatternNode *group = makeNode(RPRPatternNode); + + group->nodeType = RPR_PATTERN_GROUP; + group->min = count; + group->max = count; + group->reluctant = false; + group->reluctant_location = -1; + group->location = -1; + group->children = list_make1(prev); + mergedChildren = lappend(mergedChildren, group); + } + else + mergedChildren = lappend(mergedChildren, prev); + } + + return mergedChildren; +} + +/* + * mergeGroupPrefixSuffix + * Merge sequence prefix/suffix into GROUP with matching children. + * + * When a GROUP's children appear as a prefix before and/or suffix after + * the GROUP in a SEQ, absorb them by incrementing the GROUP's quantifier. + * This runs iteratively: A B A B (A B)+ A B -> (A B){5,}. + * + * Algorithm: + * For each GROUP encountered in the sequence: + * 1. PREFIX phase: compare the last N elements already in the result + * list against the GROUP's children. On match, remove them from + * result and increment the GROUP's min/max. Repeat until no match. + * 2. SUFFIX phase: compare the next N elements in the input against + * the GROUP's children. On match, skip them (via skipUntil) and + * increment min/max. Repeat until no match. + * + * Examples: + * A B (A B)+ -> (A B){2,} + * (A B)+ A B -> (A B){2,} + * A B (A B)+ A B -> (A B){3,} + */ +static List * +mergeGroupPrefixSuffix(List *children) +{ + List *result = NIL; + int numChildren = list_length(children); + int i; + int skipUntil = -1; /* skip suffix elements already absorbed */ + + for (i = 0; i < numChildren; i++) + { + RPRPatternNode *child = (RPRPatternNode *) list_nth(children, i); + + /* + * The suffix absorption logic below adjusts i to skip absorbed + * elements, ensuring we never revisit them. Verify this invariant. + */ + Assert(i >= skipUntil); + + /* + * If this is a GROUP, see if preceding/following elements match its + * children. GROUP's content may be wrapped in a SEQ - unwrap for + * comparison. + */ + if (child->nodeType == RPR_PATTERN_GROUP && child->reluctant == false) + { + List *groupContent = child->children; + int groupChildCount; + int prefixLen = list_length(result); + List *trimmed; + + /* + * If GROUP has single SEQ child, compare with SEQ's children. + * e.g., (A B)+ internally contains sequence A B; compare against + * that. + */ + if (list_length(groupContent) == 1) + { + RPRPatternNode *inner = (RPRPatternNode *) linitial(groupContent); + + if (inner->nodeType == RPR_PATTERN_SEQ) + groupContent = inner->children; + } + + groupChildCount = list_length(groupContent); + + /* + * PREFIX MERGE: Check if preceding elements match. Keep absorbing + * as long as we have matching prefixes. + */ + while (prefixLen >= groupChildCount && groupChildCount > 0) + { + List *prefixElements = NIL; + int j; + + /* Extract last groupChildCount elements from prefix */ + for (j = prefixLen - groupChildCount; j < prefixLen; j++) + { + prefixElements = lappend(prefixElements, + list_nth(result, j)); + } + + /* Compare with GROUP's (possibly unwrapped) children */ + if (rprPatternChildrenEqual(prefixElements, groupContent) && + child->min < RPR_QUANTITY_INF - 1 && + (child->max == RPR_QUANTITY_INF || + child->max < RPR_QUANTITY_INF - 1)) + { + /* + * Match! Merge by incrementing GROUP's quantifier. Remove + * the prefix elements from output. + */ + child->min += 1; + if (child->max != RPR_QUANTITY_INF) + child->max += 1; + + /* Rebuild result without matched prefix */ + trimmed = NIL; + for (j = 0; j < prefixLen - groupChildCount; j++) + { + trimmed = lappend(trimmed, + list_nth(result, j)); + } + result = trimmed; + prefixLen = list_length(result); + } + else + { + list_free(prefixElements); + break; + } + + list_free(prefixElements); + } + + /* + * SUFFIX MERGE: Check if following elements match. Keep absorbing + * as long as we have matching suffixes. + */ + while (i + groupChildCount < numChildren && groupChildCount > 0) + { + List *suffixElements = NIL; + int j; + int suffixStart = i + 1; + + /* suffixStart always >= skipUntil after i adjustment */ + Assert(skipUntil <= suffixStart); + + /* Extract next groupChildCount elements as suffix */ + for (j = 0; j < groupChildCount; j++) + { + int idx = suffixStart + j; + + /* while condition guarantees idx < numChildren */ + Assert(idx < numChildren); + suffixElements = lappend(suffixElements, + list_nth(children, idx)); + } + + /* Compare with GROUP's children */ + if (list_length(suffixElements) == groupChildCount && + rprPatternChildrenEqual(suffixElements, groupContent) && + child->min < RPR_QUANTITY_INF - 1 && + (child->max == RPR_QUANTITY_INF || + child->max < RPR_QUANTITY_INF - 1)) + { + /* + * Match! Absorb suffix by incrementing quantifier and + * skipping. + */ + child->min += 1; + if (child->max != RPR_QUANTITY_INF) + child->max += 1; + skipUntil = suffixStart + groupChildCount; + + /* + * Update i to continue suffix check after absorbed + * elements + */ + i = skipUntil - 1; + } + else + { + list_free(suffixElements); + break; + } + + list_free(suffixElements); + } + } + + result = lappend(result, child); + } + + return result; +} + +/* + * optimizeSeqPattern + * Optimize SEQ pattern node. + * + * Optimizations: + * 1. Flatten nested SEQ and GROUP{1,1} + * 2. Merge consecutive identical VAR nodes + * 3. Merge consecutive identical GROUP nodes + * 4. Merge consecutive identical ALT nodes into GROUP + * 5. Merge prefix/suffix into GROUP with matching children + * 6. Unwrap single-item SEQ + */ +static RPRPatternNode * +optimizeSeqPattern(RPRPatternNode *pattern) +{ + /* Recursively optimize children and flatten nested SEQ/GROUP{1,1} */ + pattern->children = flattenSeqChildren(pattern->children); + + /* Merge consecutive identical VAR nodes */ + pattern->children = mergeConsecutiveVars(pattern->children); + + /* Merge consecutive identical GROUP nodes */ + pattern->children = mergeConsecutiveGroups(pattern->children); + + /* Merge consecutive identical ALT nodes into GROUP */ + pattern->children = mergeConsecutiveAlts(pattern->children); + + /* Merge prefix/suffix into GROUP with matching children */ + pattern->children = mergeGroupPrefixSuffix(pattern->children); + + /* Unwrap single-item SEQ */ + return tryUnwrapSingleChild(pattern); +} + +/* + * flattenAltChildren + * Recursively optimize children and flatten nested ALT nodes. + * + * Example: + * (A | (B | C)) -> (A | B | C) + * + * Returns a new list with optimized children, with nested ALT children + * flattened into the parent list. + */ +static List * +flattenAltChildren(List *children) +{ + ListCell *lc; + List *newChildren = NIL; + + foreach(lc, children) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + RPRPatternNode *opt = optimizeRPRPattern(child); + + if (opt->nodeType == RPR_PATTERN_ALT) + newChildren = list_concat(newChildren, list_copy(opt->children)); + else + newChildren = lappend(newChildren, opt); + } + + return newChildren; +} + +/* + * removeDuplicateAlternatives + * Remove duplicate alternatives from a list. + * + * Examples: + * (A | B | A) -> (A | B) + * (X | Y | X | Z | Y) -> (X | Y | Z) + * + * Returns a new list with only unique children (first occurrence kept). + */ +static List * +removeDuplicateAlternatives(List *children) +{ + ListCell *lc; + ListCell *lc2; + List *uniqueChildren = NIL; + + foreach(lc, children) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + bool isDuplicate = false; + + foreach(lc2, uniqueChildren) + { + if (rprPatternEqual((RPRPatternNode *) lfirst(lc2), child)) + { + isDuplicate = true; + break; + } + } + + if (!isDuplicate) + uniqueChildren = lappend(uniqueChildren, child); + } + + return uniqueChildren; +} + +/* + * optimizeAltPattern + * Optimize ALT pattern node. + * + * Optimizations: + * 1. Flatten nested ALT + * 2. Remove duplicate alternatives + * 3. Unwrap single-item ALT + */ +static RPRPatternNode * +optimizeAltPattern(RPRPatternNode *pattern) +{ + /* Recursively optimize children and flatten nested ALT */ + pattern->children = flattenAltChildren(pattern->children); + + /* Remove duplicate alternatives */ + pattern->children = removeDuplicateAlternatives(pattern->children); + + /* Unwrap single-item ALT */ + return tryUnwrapSingleChild(pattern); +} + +/* + * tryMultiplyQuantifiers + * Try to multiply quantifiers. + * + * Multiplication is SAFE when: + * 1. Both unbounded: (A*)* -> A*, (A+)+ -> A+ + * 2. Outer exact: (A{m,n}){k} -> A{m*k, n*k} + * 3. Outer range + child {1,1}: (A){2,} -> A{2,} + * + * Multiplication is NOT safe when: + * - Only child unbounded: (A+){3} has different semantics + * - Outer range + child not {1,1}: gaps possible + * e.g., (A{2}){2,3} yields 4,6 only (not 4,5,6) + * + * Returns the child node with multiplied quantifiers if successful, + * otherwise returns the original pattern unchanged. + */ +static RPRPatternNode * +tryMultiplyQuantifiers(RPRPatternNode *pattern) +{ + RPRPatternNode *child; + int64 new_min_64; + int64 new_max_64; + + /* Parser always creates GROUP with exactly one child */ + Assert(list_length(pattern->children) == 1); + + if (pattern->reluctant) + return pattern; + + child = (RPRPatternNode *) linitial(pattern->children); + + if ((child->nodeType != RPR_PATTERN_VAR && + child->nodeType != RPR_PATTERN_GROUP) || + child->reluctant) + return pattern; + + /* Case 1: Both unbounded - (A*)* -> A*, (A+)+ -> A+ */ + if (child->max == RPR_QUANTITY_INF && pattern->max == RPR_QUANTITY_INF) + { + new_min_64 = (int64) child->min * pattern->min; + if (new_min_64 >= RPR_QUANTITY_INF) + return pattern; /* overflow, skip optimization */ + + child->min = (int) new_min_64; + child->max = RPR_QUANTITY_INF; + return child; + } + + /*---------- + * Case 2: Outer exact (min == max): (A{2,3}){4} -> A{8,12}. + * Safe because every iteration produces the same range. + * + * Case 3: Child {1,1}: (A){2,5} -> A{2,5}. + * Safe because the child contributes exactly one per + * iteration, so the outer range maps directly. + * + * Unsafe example: (A{2}){2,3} produces counts 4 or 6 only, not the full + * range 4..6, so we cannot flatten when child has a non-trivial range AND + * outer is also a range. + *---------- + */ + if (child->max != RPR_QUANTITY_INF && + (pattern->min == pattern->max || + (child->min == 1 && child->max == 1))) + { + new_min_64 = (int64) pattern->min * child->min; + if (new_min_64 >= RPR_QUANTITY_INF) + return pattern; + + /* Outer unbounded: result is unbounded regardless of child */ + if (pattern->max == RPR_QUANTITY_INF) + new_max_64 = RPR_QUANTITY_INF; + else + { + new_max_64 = (int64) pattern->max * child->max; + + if (new_max_64 >= RPR_QUANTITY_INF) + return pattern; + } + + child->min = (int) new_min_64; + child->max = (int) new_max_64; + return child; + } + + /* Not safe to multiply */ + return pattern; +} + +/* + * tryUnwrapGroup + * Try to unwrap GROUP{1,1} node. + * + * Examples: + * (A){1,1} -> A + * (A B){1,1} -> SEQ(A, B) (unwraps the inner SEQ) + * (A)? -> A? (propagate quantifier to single VAR child) + * (A)+? -> A+? (propagate quantifier including reluctant) + * + * If GROUP has min=1, max=1, return the child directly (reluctant on + * {1,1} is meaningless). If GROUP has a single VAR child with default + * quantifier {1,1}, propagate the GROUP's quantifier to the child and + * unwrap. Otherwise returns the pattern unchanged. + * + * Note: Parser always creates GROUP with exactly one child via list_make1(). + */ +static RPRPatternNode * +tryUnwrapGroup(RPRPatternNode *pattern) +{ + RPRPatternNode *child; + + /* Parser always creates GROUP with single child */ + Assert(list_length(pattern->children) == 1); + + child = (RPRPatternNode *) linitial(pattern->children); + + /* GROUP{1,1}: unwrap directly (reluctant on {1,1} is meaningless) */ + if (pattern->min == 1 && pattern->max == 1) + return child; + + /* + * Single VAR child with default {1,1}: propagate GROUP's quantifier to + * the child and unwrap. E.g., (A)?? -> A??, (A)+? -> A+? + */ + if (child->nodeType == RPR_PATTERN_VAR && + child->min == 1 && child->max == 1 && child->reluctant == false) + { + child->min = pattern->min; + child->max = pattern->max; + child->reluctant = pattern->reluctant; + child->reluctant_location = pattern->reluctant_location; + return child; + } + + return pattern; +} + +/* + * optimizeGroupPattern + * Optimize GROUP pattern node. + * + * Optimizations: + * 1. Quantifier multiplication: (A{m}){n} -> A{m*n} + * 2. Unwrap GROUP{1,1} + */ +static RPRPatternNode * +optimizeGroupPattern(RPRPatternNode *pattern) +{ + ListCell *lc; + List *newChildren; + RPRPatternNode *result; + + /* Recursively optimize children */ + newChildren = NIL; + foreach(lc, pattern->children) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + + newChildren = lappend(newChildren, optimizeRPRPattern(child)); + } + pattern->children = newChildren; + + /* Try quantifier multiplication */ + result = tryMultiplyQuantifiers(pattern); + if (result != pattern) + return result; + + /* Try unwrapping GROUP{1,1} */ + return tryUnwrapGroup(pattern); +} + +/* + * optimizeRPRPattern + * Optimize RPRPatternNode tree (dispatcher). + * + * Dispatches to type-specific optimization functions. + * Returns the optimized pattern (may be a different node). + */ +static RPRPatternNode * +optimizeRPRPattern(RPRPatternNode *pattern) +{ + /* Pattern nodes from parser are never NULL */ + Assert(pattern != NULL); + + check_stack_depth(); + + switch (pattern->nodeType) + { + case RPR_PATTERN_VAR: + return pattern; + case RPR_PATTERN_SEQ: + return optimizeSeqPattern(pattern); + case RPR_PATTERN_ALT: + return optimizeAltPattern(pattern); + case RPR_PATTERN_GROUP: + return optimizeGroupPattern(pattern); + } + + return pattern; /* keep compiler quiet */ +} + +/* + * collectDefineVariables + * Collect variable names from DEFINE clause. + * + * Populates varNames array with variable names in DEFINE order. + * This ensures varId == defineIdx, eliminating runtime mapping. + * Returns the number of variables collected. + */ +static int +collectDefineVariables(List *defineVariableList, char **varNames) +{ + ListCell *lc; + int numVars = 0; + + foreach(lc, defineVariableList) + { + /* Parser already checked this limit in transformDefineClause */ + Assert(numVars < RPR_VARID_MAX); + + varNames[numVars++] = strVal(lfirst(lc)); + } + + return numVars; +} + +/* + * scanRPRPatternRecursive + * Recursively scan pattern AST (pass 1 internal). + * + * Collects unique variable names and counts elements while tracking depth. + * Variables from DEFINE clause are already in varNames; this adds any + * additional variables found in the pattern. + */ +static void +scanRPRPatternRecursive(RPRPatternNode *node, char **varNames, int *numVars, + int *numElements, RPRDepth depth, RPRDepth *maxDepth) +{ + ListCell *lc; + int i; + + /* Pattern nodes from parser are never NULL */ + Assert(node != NULL); + + check_stack_depth(); + + /* Check recursion depth limit before overflow occurs */ + if (depth >= RPR_DEPTH_MAX) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("pattern nesting too deep"), + errdetail("Pattern nesting depth %d exceeds maximum %d.", + depth, RPR_DEPTH_MAX - 1))); + + /* Track maximum depth */ + if (depth > *maxDepth) + *maxDepth = depth; + + switch (node->nodeType) + { + case RPR_PATTERN_VAR: + /* Count element */ + (*numElements)++; + + /* Collect variable name if not already present */ + for (i = 0; i < *numVars; i++) + { + if (strcmp(varNames[i], node->varName) == 0) + return; /* Already have this variable */ + } + + /* + * Variable not in DEFINE clause - this is valid per SQL standard. + * Such variables are implicitly TRUE. Add to varNames so they get + * a varId >= defineVariableList length, which executor treats as + * TRUE. + */ + Assert(*numVars < RPR_VARID_MAX); + varNames[(*numVars)++] = node->varName; + break; + + case RPR_PATTERN_SEQ: + /* Sequence: just recurse into children */ + foreach(lc, node->children) + { + scanRPRPatternRecursive((RPRPatternNode *) lfirst(lc), varNames, + numVars, numElements, depth, maxDepth); + } + break; + + case RPR_PATTERN_GROUP: + /* Add BEGIN element if group has non-trivial quantifier */ + if (node->min != 1 || node->max != 1) + (*numElements)++; + + /* Recurse into children at increased depth */ + foreach(lc, node->children) + { + scanRPRPatternRecursive((RPRPatternNode *) lfirst(lc), varNames, + numVars, numElements, depth + 1, maxDepth); + } + + /* Add END element if group has non-trivial quantifier */ + if (node->min != 1 || node->max != 1) + (*numElements)++; + break; + + case RPR_PATTERN_ALT: + /* Count ALT start element */ + (*numElements)++; + + /* Recurse into children at increased depth */ + foreach(lc, node->children) + { + scanRPRPatternRecursive((RPRPatternNode *) lfirst(lc), varNames, + numVars, numElements, depth + 1, maxDepth); + } + break; + } +} + +/* + * scanRPRPattern + * Scan pattern AST (pass 1 entry point). + * + * Collects unique variable names (appending to those from DEFINE clause), + * counts total elements (including FIN marker), and tracks maximum depth. + * Reports error if element count exceeds RPR_ELEMIDX_MAX. + */ +static void +scanRPRPattern(RPRPatternNode *node, char **varNames, int *numVars, + int *numElements, RPRDepth *maxDepth) +{ + *numElements = 0; + *maxDepth = 0; + + scanRPRPatternRecursive(node, varNames, numVars, numElements, 0, maxDepth); + + (*numElements)++; /* +1 for FIN marker */ + + if (*numElements > RPR_ELEMIDX_MAX) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("pattern too complex"), + errdetail("Pattern has %d elements, maximum is %d.", + *numElements, RPR_ELEMIDX_MAX))); +} + +/* + * allocateRPRPattern + * Allocate and initialize RPRPattern structure. + * + * Creates the pattern structure, copies variable names, and allocates + * the elements array. The elements array is zero-initialized. + */ +static RPRPattern * +allocateRPRPattern(int numVars, int numElements, RPRDepth maxDepth, + char **varNamesStack) +{ + RPRPattern *result; + int i; + + result = makeNode(RPRPattern); + result->numVars = numVars; + result->maxDepth = maxDepth + 1; /* +1: depth is 0-based */ + result->numElements = numElements; + + /* Copy varNames (pattern must have at least one variable) */ + Assert(numVars > 0); + result->varNames = palloc(numVars * sizeof(char *)); + for (i = 0; i < numVars; i++) + result->varNames[i] = pstrdup(varNamesStack[i]); + + /* Allocate elements array (zero-init for reserved fields) */ + Assert(numElements >= 2); + result->elements = palloc0(numElements * sizeof(RPRPatternElement)); + + return result; +} + +/* + * getVarIdFromPattern + * Get variable ID for a variable name from RPRPattern. + * + * Returns the index of the variable in the varNames array. + */ +static RPRVarId +getVarIdFromPattern(RPRPattern *pat, const char *varName) +{ + for (int i = 0; i < pat->numVars; i++) + { + if (strcmp(pat->varNames[i], varName) == 0) + return (RPRVarId) i; + } + + /* Should not happen - variable should already be collected */ + elog(ERROR, "pattern variable \"%s\" not found", varName); + pg_unreachable(); +} + +/* + * fillRPRPatternVar + * Fill a VAR pattern element. + * + * Returns true if this VAR is nullable (can match zero rows). + */ +static bool +fillRPRPatternVar(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth depth) +{ + RPRPatternElement *elem = &pat->elements[*idx]; + + memset(elem, 0, sizeof(RPRPatternElement)); + elem->varId = getVarIdFromPattern(pat, node->varName); + elem->depth = depth; + elem->min = node->min; + elem->max = (node->max == INT_MAX) ? RPR_QUANTITY_INF : node->max; + Assert(elem->min >= 0 && elem->min < RPR_QUANTITY_INF && + elem->max >= 1 && + (elem->max == RPR_QUANTITY_INF || elem->min <= elem->max)); + elem->next = RPR_ELEMIDX_INVALID; + elem->jump = RPR_ELEMIDX_INVALID; + if (node->reluctant) + elem->flags |= RPR_ELEM_RELUCTANT; + (*idx)++; + + return (node->min == 0); +} + +/* + * fillRPRPatternGroup + * Fill a GROUP pattern and its children. + * + * Creates elements for group content at increased depth, plus BEGIN/END + * marker pair if the group has a non-trivial quantifier (not {1,1}). + * + * Element layout for (A B){2,3}: + * + * [BEGIN] [A] [B] [END] [next element...] + * | | ^ + * | +-- jump --+ (loop back to first child) + * +---- jump -------------------+ (skip to after END) + * + * BEGIN.jump points past END (skip path when count >= max or min == 0). + * END.jump points to the first child (loop-back path). + * BEGIN.next and END.next are set later by finalizeRPRPattern(). + * + * Returns true if this group is nullable. A group is nullable when its + * min is 0 (can be skipped entirely) or its body is nullable (every path + * through the body can match zero rows). + */ +static bool +fillRPRPatternGroup(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth depth) +{ + ListCell *lc; + int groupStartIdx = *idx; + int beginIdx = -1; + bool bodyNullable = true; + + /* Add BEGIN marker if group has non-trivial quantifier */ + if (node->min != 1 || node->max != 1) + { + RPRPatternElement *elem = &pat->elements[*idx]; + + beginIdx = *idx; + memset(elem, 0, sizeof(RPRPatternElement)); + elem->varId = RPR_VARID_BEGIN; + elem->depth = depth; + elem->min = node->min; + elem->max = (node->max == INT_MAX) ? RPR_QUANTITY_INF : node->max; + Assert(elem->min >= 0 && elem->min < RPR_QUANTITY_INF && + elem->max >= 1 && + (elem->max == RPR_QUANTITY_INF || elem->min <= elem->max)); + elem->next = RPR_ELEMIDX_INVALID; /* set by finalize */ + elem->jump = RPR_ELEMIDX_INVALID; /* set after END */ + if (node->reluctant) + elem->flags |= RPR_ELEM_RELUCTANT; + (*idx)++; + groupStartIdx = *idx; /* children start after BEGIN */ + } + + foreach(lc, node->children) + { + if (!fillRPRPattern((RPRPatternNode *) lfirst(lc), pat, idx, depth + 1)) + bodyNullable = false; + } + + /* Add group end marker if group has non-trivial quantifier */ + if (node->min != 1 || node->max != 1) + { + RPRPatternElement *beginElem = &pat->elements[beginIdx]; + RPRPatternElement *endElem = &pat->elements[*idx]; + + memset(endElem, 0, sizeof(RPRPatternElement)); + endElem->varId = RPR_VARID_END; + endElem->depth = depth; + endElem->min = node->min; + endElem->max = (node->max == INT_MAX) ? RPR_QUANTITY_INF : node->max; + Assert(endElem->min >= 0 && endElem->min < RPR_QUANTITY_INF && + endElem->max >= 1 && + (endElem->max == RPR_QUANTITY_INF || endElem->min <= endElem->max)); + endElem->next = RPR_ELEMIDX_INVALID; + endElem->jump = groupStartIdx; /* loop to first child */ + if (node->reluctant) + endElem->flags |= RPR_ELEM_RELUCTANT; + + /* + * If the group body is nullable (all paths can match empty), mark the + * END element so that nfa_advance_end can fast-forward the iteration + * count to min when reached via empty-match skip paths. + */ + if (bodyNullable) + endElem->flags |= RPR_ELEM_EMPTY_LOOP; + + (*idx)++; + + /* Set BEGIN skip pointer (next is set by finalize) */ + beginElem->jump = *idx; /* skip: go to after END */ + } + + return (node->min == 0 || bodyNullable); +} + +/* + * fillRPRPatternAlt + * Fill an ALT pattern and its alternatives. + * + * Creates ALT_START marker, fills each alternative at increased depth, + * sets jump pointers for backtracking, and next pointers for successful paths. + * + * Returns true if any branch is nullable (OR semantics: one nullable + * branch suffices for the alternation to produce an empty match). + */ +static bool +fillRPRPatternAlt(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth depth) +{ + ListCell *lc; + ListCell *lc2; + RPRPatternElement *elem; + List *altBranchStarts = NIL; + List *altEndPositions = NIL; + int afterAltIdx; + bool anyNullable = false; + + /* Add alternation start marker */ + elem = &pat->elements[*idx]; + memset(elem, 0, sizeof(RPRPatternElement)); + elem->varId = RPR_VARID_ALT; + elem->depth = depth; + elem->min = 1; + elem->max = 1; + elem->next = RPR_ELEMIDX_INVALID; + elem->jump = RPR_ELEMIDX_INVALID; + (*idx)++; + + /* Fill each alternative */ + foreach(lc, node->children) + { + RPRPatternNode *alt = (RPRPatternNode *) lfirst(lc); + int branchStart = *idx; + + altBranchStarts = lappend_int(altBranchStarts, branchStart); + if (fillRPRPattern(alt, pat, idx, depth + 1)) + anyNullable = true; + altEndPositions = lappend_int(altEndPositions, *idx - 1); + } + + /* Set jump on first element of each alternative to next alternative */ + foreach(lc, altBranchStarts) + { + int firstElemIdx = lfirst_int(lc); + + if (lnext(altBranchStarts, lc) != NULL) + pat->elements[firstElemIdx].jump = lfirst_int(lnext(altBranchStarts, lc)); + } + + /* Set next on last element of each alternative to after the alternation */ + afterAltIdx = *idx; + lc2 = list_head(altBranchStarts); + + foreach(lc, altEndPositions) + { + int endPos = lfirst_int(lc); + int branchStart = lfirst_int(lc2); + + if (pat->elements[endPos].next != RPR_ELEMIDX_INVALID) + { + /* + * An inner ALT already set next on this element. Redirect all + * elements in this branch that share the same target to point to + * after this ALT instead. + */ + int oldTarget = pat->elements[endPos].next; + int j; + + for (j = branchStart; j <= endPos; j++) + { + if (pat->elements[j].next == oldTarget) + pat->elements[j].next = afterAltIdx; + } + } + else + { + pat->elements[endPos].next = afterAltIdx; + } + + lc2 = lnext(altBranchStarts, lc2); + } + + list_free(altBranchStarts); + list_free(altEndPositions); + + return anyNullable; +} + +/* + * fillRPRPattern + * Fill pattern elements array from AST (pass 2). + * + * Recursively traverses AST and populates pre-allocated elements array. + * Dispatches to type-specific fill functions. + * + * Returns true if the pattern is nullable (can match zero rows). + * For SEQ nodes, all children must be nullable (AND). + */ +static bool +fillRPRPattern(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth depth) +{ + ListCell *lc; + bool allNullable = true; + + /* Pattern nodes from parser are never NULL */ + Assert(node != NULL); + + check_stack_depth(); + + switch (node->nodeType) + { + case RPR_PATTERN_SEQ: + foreach(lc, node->children) + { + if (!fillRPRPattern((RPRPatternNode *) lfirst(lc), pat, idx, depth)) + allNullable = false; + } + return allNullable; + + case RPR_PATTERN_VAR: + return fillRPRPatternVar(node, pat, idx, depth); + + case RPR_PATTERN_GROUP: + return fillRPRPatternGroup(node, pat, idx, depth); + + case RPR_PATTERN_ALT: + return fillRPRPatternAlt(node, pat, idx, depth); + } + + return false; /* unreachable */ +} + +/* + * finalizeRPRPattern + * Finalize pattern structure after filling elements. + * + * This performs: + * 1. Initialize absorption flag to false + * 2. Set up next pointers for sequential flow + * 3. Add FIN marker at the end + */ +static void +finalizeRPRPattern(RPRPattern *result) +{ + int finIdx = result->numElements - 1; + int i; + RPRPatternElement *finElem; + + /* Initialize absorption flag */ + result->isAbsorbable = false; + + /* Set up next pointers for elements that don't have one */ + for (i = 0; i < finIdx; i++) + { + RPRPatternElement *elem = &result->elements[i]; + + if (elem->next == RPR_ELEMIDX_INVALID) + elem->next = (i < finIdx - 1) ? i + 1 : finIdx; + + /* Verify quantifier range is valid */ + Assert(elem->min >= 0 && elem->min < RPR_QUANTITY_INF && + elem->max >= 1 && + (elem->max == RPR_QUANTITY_INF || elem->min <= elem->max)); + } + + /* Add FIN marker at the end */ + finElem = &result->elements[finIdx]; + memset(finElem, 0, sizeof(RPRPatternElement)); + finElem->varId = RPR_VARID_FIN; + finElem->depth = 0; + finElem->min = 1; + finElem->max = 1; + finElem->next = RPR_ELEMIDX_INVALID; + finElem->jump = RPR_ELEMIDX_INVALID; +} + +/*------------------------------------------------------------------------- + * CONTEXT ABSORPTION: TWO-FLAG DESIGN + *------------------------------------------------------------------------- + * + * Context absorption eliminates redundant match searches by absorbing newer + * contexts that cannot produce longer matches than older contexts. This + * achieves O(n^2) -> O(n) performance improvement for patterns like A+ B. + * + * Core Insight: + * For pattern A+ B, if Ctx1 starts at row 0 and Ctx2 starts at row 1, + * both matching A continuously, Ctx1 will always have more A matches. + * When B finally appears, Ctx1's match (0 to current) is always longer + * than Ctx2's match (1 to current). So Ctx2 can be safely eliminated. + * + * Two Flags: + * 1. RPR_ELEM_ABSORBABLE - "Absorption judgment point" + * WHERE contexts can be compared for absorption. + * - Simple unbounded VAR (A+): the VAR element itself + * - Unbounded GROUP ((A B)+): the END element only + * + * 2. RPR_ELEM_ABSORBABLE_BRANCH - "Absorbable region marker" + * ALL elements within the absorbable region. + * - Used for tracking state.isAbsorbable at runtime + * - States leaving this region become non-absorbable permanently + * + * Why Two Flags? + * For pattern "(A B)+", contexts at different positions (one at A, + * another at B) cannot be compared - they must synchronize at END. + * + * Example: "(A B)+" with input A B A B A B... + * Row 0 (A): Ctx1 starts, matches A + * Row 1 (B): Ctx1 matches B -> END (count=1) + * Row 2 (A): Ctx1 loops to A, Ctx2 starts at A + * Row 3 (B): Ctx1 at END (count=2), Ctx2 at END (count=1) + * -> Both at END, comparable! Ctx1 absorbs Ctx2. + * + * Contexts synchronize at END every group-length rows. Therefore: + * - ABSORBABLE marks END as judgment point (where to compare) + * - ABSORBABLE_BRANCH keeps state.isAbsorbable=true through A->B->END + * + * Pattern Examples: + * + * Pattern: A+ B + * Element 0 (A): ABSORBABLE | ABSORBABLE_BRANCH <- judgment point + * Element 1 (B): (none) + * -> Compare at A every row. When contexts move to B, absorption stops. + * + * Pattern: (A B)+ C + * Element 0 (A): ABSORBABLE_BRANCH + * Element 1 (B): ABSORBABLE_BRANCH + * Element 2 (END): ABSORBABLE | ABSORBABLE_BRANCH <- judgment point + * Element 3 (C): (none) + * -> Compare at END every 2 rows. When contexts move to C, absorption stops. + * + * Pattern: (A+ B+)+ C + * Element 0 (A): ABSORBABLE | ABSORBABLE_BRANCH <- only first A+ flagged + * Element 1 (B): (none) + * Element 2 (END): (none) + * Element 3 (C): (none) + * -> Only first unbounded portion (A+) gets flags. Absorption happens + * at A during first iteration. After moving to B+, absorption stops. + * + * First Unbounded Portion Strategy: + * The algorithm only flags the FIRST unbounded portion starting from + * element 0. This is sufficient because: + * - Absorption in first portion already achieves O(n) complexity + * - Later portions have different synchronization characteristics + * - Nested unbounded patterns are too complex for simple absorption + * - Complex patterns (nested groups, etc.) naturally die from mismatch + * + * Runtime Usage (in nodeWindowAgg.c): + * - state.isAbsorbable = (previous && elem.ABSORBABLE_BRANCH) + * - Monotonic: once false, stays false (cannot re-enter region) + * - context.hasAbsorbableState: can absorb others (>=1 absorbable state) + * - context.allStatesAbsorbable: can be absorbed (ALL states absorbable) + * - Absorption check: if Ctx1.hasAbsorbable && Ctx2.allAbsorbable, + * compare counts at same elemIdx, absorb if Ctx1.count >= Ctx2.count + * + *------------------------------------------------------------------------- + */ + +/* + * isFixedLengthChildren + * Check if all children at scopeDepth have fixed-length quantifiers + * (min == max), recursively for nested subgroups. + * + * A fixed-length group is semantically equivalent to unrolling each child + * to {1,1} copies, which is the existing Case 2 already proven correct + * for absorption. This check recognizes fixed-length groups at compile + * time without actually unrolling them. + * + * Traverses the flat element array starting at idx. For VAR elements, + * checks min == max. For BEGIN elements (nested subgroups), recurses + * into the subgroup and also checks the subgroup's END quantifier. + * ALT elements are rejected (alternation inside absorbable group is + * not supported). + * + * Returns true if all children are fixed-length, stopping at the END + * element at scopeDepth - 1. + */ +static bool +isFixedLengthChildren(RPRPattern *pattern, RPRElemIdx idx, RPRDepth scopeDepth) +{ + RPRPatternElement *e = &pattern->elements[idx]; + + check_stack_depth(); + + while (e->depth == scopeDepth) + { + if (RPRElemIsVar(e)) + { + if (e->min != e->max) + return false; + } + else if (RPRElemIsBegin(e)) + { + RPRElemIdx childIdx = e->next; + + /* Recurse into subgroup children at scopeDepth + 1 */ + if (!isFixedLengthChildren(pattern, childIdx, scopeDepth + 1)) + return false; + + /* Advance past the subgroup to its END element */ + e = &pattern->elements[e->next]; + while (e->depth > scopeDepth) + e = &pattern->elements[e->next]; + + /* e is now the END at scopeDepth; check its quantifier */ + Assert(RPRElemIsEnd(e) && e->depth == scopeDepth); + if (e->min != e->max) + return false; + } + else + { + /* ALT inside group: not supported for absorption */ + return false; + } + + Assert(e->next != RPR_ELEMIDX_INVALID); + e = &pattern->elements[e->next]; + } + + return true; +} + +/* + * isUnboundedStart + * Check if the element at idx starts an unbounded greedy sequence. + * + * For context absorption to work, the sequence starting at idx must be: + * - Unbounded (max = infinity) + * - Greedy (not reluctant) + * - At the start of current scope + * + * Two cases are handled: + * 1. Simple VAR: A+ B C - A has max=INF, gets both flags + * 2. Unbounded GROUP with fixed-length children: (A B{2})+ C + * All children must have min == max (recursively for nested subgroups). + * This is equivalent to unrolling to {1,1} VARs, e.g., (A B B)+ C. + * All elements within the group get ABSORBABLE_BRANCH. + * Only the unbounded END gets ABSORBABLE (judgment point). + * Examples: + * (A B{2})+ C - B{2} has min==max, step=3 + * (A (B C){2} D)+ E - nested {2} subgroup, step=6 + * ((A (B C){2}){2})+ - doubly nested {2}, step=10 + * (A ((B C{3}){2} D){2} E)+ F - deep nesting, step=20 + * + * Returns false for patterns where absorption cannot work: + * - A B+ (unbounded not at start) + * - A+? B (reluctant quantifier) + * - (A | B)+ (ALT inside group) + * - (A B+)+ (variable-length element inside group) + * - (A B{2,5})+ (min != max inside group) + */ +static bool +isUnboundedStart(RPRPattern *pattern, RPRElemIdx idx) +{ + RPRPatternElement *elem = &pattern->elements[idx]; + RPRDepth startDepth = elem->depth; + RPRPatternElement *e; + + /* Case 1: Simple unbounded VAR at start (greedy only) */ + if (RPRElemIsVar(elem) && elem->max == RPR_QUANTITY_INF && + !RPRElemIsReluctant(elem)) + { + /* Set both flags on first element */ + elem->flags |= RPR_ELEM_ABSORBABLE_BRANCH | RPR_ELEM_ABSORBABLE; + return true; + } + + /* + * Case 2: Unbounded GROUP with fixed-length children. Each child must + * have min == max (recursively for nested subgroups), ensuring a fixed + * step size per iteration so that count-dominance holds. + */ + if (!isFixedLengthChildren(pattern, idx, startDepth)) + return false; + + /* Find the END element at startDepth - 1 */ + e = &pattern->elements[idx]; + while (e->depth >= startDepth) + e = &pattern->elements[e->next]; + + /* END must be unbounded greedy */ + if (e->depth == startDepth - 1 && + RPRElemIsEnd(e) && e->max == RPR_QUANTITY_INF && + !RPRElemIsReluctant(e)) + { + Assert(e->jump == idx); /* END points back to first child */ + + /* Set ABSORBABLE_BRANCH on all children, ABSORBABLE on END only */ + for (e = elem; !RPRElemIsEnd(e) || e->depth >= startDepth; + e = &pattern->elements[e->next]) + e->flags |= RPR_ELEM_ABSORBABLE_BRANCH; + e->flags |= RPR_ELEM_ABSORBABLE_BRANCH | RPR_ELEM_ABSORBABLE; + return true; + } + + return false; +} + +/* + * computeAbsorbabilityRecursive + * Recursively check absorbability starting from given index. + * + * If the element at startIdx is ALT, recursively checks each branch independently. + * Each branch gets its own absorbability status, and if any branch is absorbable, + * the ALT element itself is marked with RPR_ELEM_ABSORBABLE_BRANCH. + * + * If BEGIN, skips to first child. + * + * Otherwise (VAR), checks if the element starts an unbounded sequence via + * isUnboundedStart. + */ +static void +computeAbsorbabilityRecursive(RPRPattern *pattern, RPRElemIdx startIdx, + bool *hasAbsorbable) +{ + RPRPatternElement *elem = &pattern->elements[startIdx]; + + check_stack_depth(); + + if (RPRElemIsAlt(elem)) + { + /* ALT: recursively check each branch */ + RPRElemIdx branchIdx = elem->next; + RPRPatternElement *branchFirst; + bool branchAbsorbable; + + while (branchIdx != RPR_ELEMIDX_INVALID) + { + branchAbsorbable = false; + + Assert(branchIdx < pattern->numElements); + branchFirst = &pattern->elements[branchIdx]; + + /* Stop if element is outside ALT scope (not a branch) */ + if (branchFirst->depth <= elem->depth) + break; + + /* Recursively check this branch */ + computeAbsorbabilityRecursive(pattern, branchIdx, &branchAbsorbable); + if (branchAbsorbable) + { + *hasAbsorbable = true; + } + + branchIdx = branchFirst->jump; + } + + /* Mark ALT element if any branch is absorbable */ + if (*hasAbsorbable) + elem->flags |= RPR_ELEM_ABSORBABLE_BRANCH; + } + else if (RPRElemIsBegin(elem)) + { + /* + * BEGIN: first try to treat this BEGIN's children as an unbounded + * group directly (handles nested fixed-length groups like ((A{2} + * B{3}){2})+). If that fails, skip to first child and recurse as + * before. + */ + if (isUnboundedStart(pattern, elem->next)) + { + *hasAbsorbable = true; + elem->flags |= RPR_ELEM_ABSORBABLE_BRANCH; + } + else + { + computeAbsorbabilityRecursive(pattern, elem->next, hasAbsorbable); + + /* Mark BEGIN element if contents are absorbable */ + if (*hasAbsorbable) + elem->flags |= RPR_ELEM_ABSORBABLE_BRANCH; + } + } + else + { + /* Should never reach END - structural invariant of pattern AST */ + Assert(!RPRElemIsEnd(elem)); + + /* Non-ALT, non-BEGIN: check if unbounded start */ + if (isUnboundedStart(pattern, startIdx)) + { + *hasAbsorbable = true; + } + } +} + +/* + * computeAbsorbability + * Determine if pattern supports context absorption optimization. + * + * Context absorption eliminates redundant match searches by absorbing + * newer contexts that cannot produce longer matches than older contexts. + * This achieves O(n^2) -> O(n) performance improvement. + * + * Only greedy unbounded quantifiers at pattern start can be absorbable. + * Reluctant quantifiers are excluded because they don't maintain monotonic + * decrease property required for safe absorption. + * + * This function sets two flags: + * RPR_ELEM_ABSORBABLE: Absorption judgment point + * - Simple unbounded VAR: the VAR itself (e.g., A in A+) + * - Unbounded GROUP: the END element (e.g., END in (A B)+) + * RPR_ELEM_ABSORBABLE_BRANCH: All elements in absorbable region + * - All elements within the same scope as unbounded start + * + * Examples: + * A+ B C - absorbable (A gets both flags) + * (A B)+ C - absorbable (A,B,END get BRANCH, END gets ABSORBABLE) + * A B+ - NOT absorbable (unbounded not at start) + * A+? B C - NOT absorbable (reluctant quantifier) + * (A+ B+)+ - only first A+ on first iteration (nested unbounded not supported) + * A+ | B+ - both branches absorbable independently + * A+ | C D - only A+ branch absorbable (C D branch not absorbable) + * ((A+ B) | C) D - nested ALT: A+ branch is absorbable + */ +static void +computeAbsorbability(RPRPattern *pattern) +{ + bool hasAbsorbable = false; + + /* Parser always produces at least one element + FIN */ + Assert(pattern->numElements >= 2); + + /* Start recursion from first element */ + computeAbsorbabilityRecursive(pattern, 0, &hasAbsorbable); + pattern->isAbsorbable = hasAbsorbable; +} + +/* + * collectPatternVariablesRecursive + * Recursively collect variable names from pattern AST. + */ +static void +collectPatternVariablesRecursive(RPRPatternNode *node, List **varNames) +{ + ListCell *lc; + + Assert(node != NULL); + + check_stack_depth(); + + switch (node->nodeType) + { + case RPR_PATTERN_VAR: + /* Add variable if not already in list */ + foreach(lc, *varNames) + { + if (strcmp(strVal(lfirst(lc)), node->varName) == 0) + return; /* Already collected */ + } + *varNames = lappend(*varNames, makeString(pstrdup(node->varName))); + break; + + case RPR_PATTERN_SEQ: + case RPR_PATTERN_ALT: + case RPR_PATTERN_GROUP: + foreach(lc, node->children) + { + collectPatternVariablesRecursive((RPRPatternNode *) lfirst(lc), + varNames); + } + break; + } +} + +/* + * collectPatternVariables + * Collect unique variable names used in PATTERN clause. + * + * Returns a list of String nodes containing variable names. + * Used to filter defineClause before varId assignment. + */ +List * +collectPatternVariables(RPRPatternNode *pattern) +{ + List *varNames = NIL; + + /* Caller ensures pattern is not NULL */ + Assert(pattern != NULL); + + collectPatternVariablesRecursive(pattern, &varNames); + return varNames; +} + +/* + * buildDefineVariableList + * Build defineVariableList from defineClause. + * + * The parser already ensures that all DEFINE variables appear in PATTERN, + * so no filtering is needed here. + * + * Sets *defineVariableList to list of variable names (String nodes). + */ +void +buildDefineVariableList(List *defineClause, List **defineVariableList) +{ + ListCell *lc; + + *defineVariableList = NIL; + + foreach(lc, defineClause) + { + TargetEntry *te = (TargetEntry *) lfirst(lc); + + *defineVariableList = lappend(*defineVariableList, + makeString(pstrdup(te->resname))); + } +} + +/* + * buildRPRPattern + * Compile pattern AST to flat bytecode array. + * + * Compilation phases: + * 1. Optimize AST (flatten, merge, deduplicate) + * 2. Scan: collect variables, count elements (pass 1) + * 3. Allocate result structure + * 4. Fill elements from AST (pass 2) + * 5. Finalize pattern structure + * 6. Compute context absorption eligibility + * + * Called from createplan.c during plan creation. + */ +RPRPattern * +buildRPRPattern(RPRPatternNode *pattern, List *defineVariableList, + RPSkipTo rpSkipTo, int frameOptions, + bool hasMatchStartDependent) +{ + RPRPattern *result; + RPRPatternNode *optimized; + char *varNamesStack[RPR_VARID_MAX]; + int numVars; + int numElements; + RPRDepth maxDepth; + int idx; + bool hasLimitedFrame; + + /* Caller must check for NULL pattern before calling */ + Assert(pattern != NULL); + + /* Optimize the pattern tree */ + optimized = optimizeRPRPattern(copyObject(pattern)); + + /* Collect variable names from DEFINE clause */ + numVars = collectDefineVariables(defineVariableList, varNamesStack); + + /* Scan pattern: collect variables, count elements, validate limits */ + scanRPRPattern(optimized, varNamesStack, &numVars, &numElements, &maxDepth); + + /* Allocate result structure */ + result = allocateRPRPattern(numVars, numElements, maxDepth, varNamesStack); + + /* Fill elements (pass 2) */ + idx = 0; + fillRPRPattern(optimized, result, &idx, 0); + + /* Finalize: set up next pointers, flags, and add FIN marker */ + finalizeRPRPattern(result); + + /* + * Compute context absorption eligibility. Absorption requires both + * structural absorbability and runtime conditions. Check runtime + * conditions first to avoid unnecessary pattern analysis. + * + * Runtime conditions for absorption: + * + * 1. SKIP TO PAST LAST ROW required (not SKIP TO NEXT ROW): With NEXT + * ROW, after each match the search resumes from the next row, so contexts + * are immediately discarded. No redundant contexts accumulate, making + * absorption unnecessary. + * + * 2. Unbounded frame end required (not ROWS with bounded end): With a + * bounded frame (e.g., ROWS BETWEEN CURRENT ROW AND 10 FOLLOWING), + * matches may be truncated at frame boundaries. This changes the + * absorption semantics - older contexts don't necessarily produce longer + * matches when frame limits apply differently to each context. + */ + hasLimitedFrame = (frameOptions & FRAMEOPTION_ROWS) && + !(frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING); + + if (rpSkipTo == ST_PAST_LAST_ROW && !hasLimitedFrame && + !hasMatchStartDependent) + { + /* Runtime conditions met - check structural absorbability */ + computeAbsorbability(result); + } + + return result; +} diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index ff0e875f2a2..813a326bd78 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -214,7 +214,6 @@ static List *set_windowagg_runcondition_references(PlannerInfo *root, static void record_elided_node(PlannerGlobal *glob, int plan_node_id, NodeTag elided_type, Bitmapset *relids); - /***************************************************************************** * * SUBPLAN REFERENCES @@ -2633,6 +2632,34 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset) NRM_EQUAL, NUM_EXEC_QUAL(plan)); + /* + * Replace an expression tree in each DEFINE clause so that all Var + * nodes's varno refers to OUTER_VAR. + */ + if (IsA(plan, WindowAgg)) + { + WindowAgg *wplan = (WindowAgg *) plan; + + if (wplan->defineClause != NIL) + { + foreach(l, wplan->defineClause) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + tle = flatCopyTargetEntry(tle); + tle->expr = (Expr *) + fix_upper_expr(root, + (Node *) tle->expr, + subplan_itlist, + OUTER_VAR, + rtoffset, + NRM_EQUAL, + NUM_EXEC_QUAL(plan)); + lfirst(l) = tle; + } + } + } + pfree(subplan_itlist); } diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 4424fdbe906..02898a9106b 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -2593,6 +2593,15 @@ perform_pullup_replace_vars(PlannerInfo *root, parse->returningList = (List *) pullup_replace_vars((Node *) parse->returningList, rvcontext); + foreach(lc, parse->windowClause) + { + WindowClause *wc = lfirst_node(WindowClause, lc); + + if (wc->defineClause != NIL) + wc->defineClause = (List *) + pullup_replace_vars((Node *) wc->defineClause, rvcontext); + } + if (parse->onConflict) { parse->onConflict->onConflictSet = (List *) diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 14a1dfed2b9..fd4bdf2cb31 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -18,6 +18,7 @@ #include "access/stratnum.h" #include "nodes/bitmapset.h" #include "nodes/lockoptions.h" +#include "nodes/parsenodes.h" #include "nodes/primnodes.h" @@ -1244,6 +1245,70 @@ typedef struct Agg List *chain; } Agg; +/* ---------------- + * Row Pattern Recognition compiled pattern types + * ---------------- + */ + +/* Type definitions for RPR pattern elements */ +typedef uint8 RPRVarId; /* pattern variable ID */ +typedef uint8 RPRElemFlags; /* element flags */ +typedef uint8 RPRDepth; /* group nesting depth */ +typedef int32 RPRQuantity; /* quantifier min/max */ +typedef int16 RPRElemIdx; /* element array index */ + +/* + * RPRPatternElement - flat element for NFA pattern matching (16 bytes) + * + * Layout optimized for alignment (no padding holes): + * varId(1) + depth(1) + flags(1) + reserved(1) + min(4) + max(4) + next(2) + jump(2) + */ +typedef struct RPRPatternElement +{ + RPRVarId varId; /* variable ID, or special value for control */ + RPRDepth depth; /* group nesting depth */ + RPRElemFlags flags; /* flags (reluctant, etc.) */ + uint8 reserved; /* reserved padding byte */ + RPRQuantity min; /* quantifier minimum */ + RPRQuantity max; /* quantifier maximum */ + RPRElemIdx next; /* next element index */ + RPRElemIdx jump; /* jump target (for ALT/GROUP) */ +} RPRPatternElement; + +/* + * RPRPattern - compiled pattern for NFA execution + * + * Requires custom copy/out/read functions due to elements array. + */ +typedef struct RPRPattern +{ + pg_node_attr(custom_copy_equal, custom_read_write) + + NodeTag type; /* T_RPRPattern */ + int numVars; /* number of pattern variables */ + char **varNames; /* array of variable names (DEFINE order + * first) */ + RPRDepth maxDepth; /* maximum group nesting depth */ + int numElements; /* number of elements */ + RPRPatternElement *elements; /* array of pattern elements */ + + /*---------------- + * Context absorption optimization. + * + * Absorption is only safe when later matches are guaranteed to be + * suffixes of earlier matches. This requires simple pattern structure: + * + * Case 1: No ALT, single unbounded element (A+, (A B)+) + * Case 2: Top-level ALT with each branch being single unbounded (A+ | B+) + * + * Complex patterns like A B (A B)+ could theoretically be transformed to + * (A B){2,} for absorption, but this changes lexical order and is not + * implemented. Similarly, (A|B)+ cannot be absorbed because different + * start positions produce different match contents (not suffix relation). + */ + bool isAbsorbable; /* true if pattern supports context absorption */ +} RPRPattern; + /* ---------------- * window aggregate node * ---------------- @@ -1314,6 +1379,44 @@ typedef struct WindowAgg /* nulls sort first for in_range tests? */ bool inRangeNullsFirst; + /* Row Pattern Recognition AFTER MATCH SKIP clause */ + RPSkipTo rpSkipTo; /* Row Pattern Skip To type */ + + /* Compiled Row Pattern for NFA execution */ + struct RPRPattern *rpPattern; + + /* Row Pattern DEFINE clause (list of TargetEntry) */ + List *defineClause; + + /* + * Bitmapset of DEFINE variable indices whose expressions depend on + * match_start (contain FIRST, LAST-with-offset, or compound + * PREV_FIRST/NEXT_FIRST/PREV_LAST/NEXT_LAST with offset). Variables in + * this set require per-context re-evaluation during NFA processing. + */ + Bitmapset *defineMatchStartDependent; + + /* + * Navigation offset status and values for tuplestore mark optimization. + * See RPRNavOffsetKind in nodes/parsenodes.h. + * + * navMaxOffset: maximum backward reach from currentpos (contributed by + * PREV, LAST-with-offset, compound PREV_LAST/NEXT_LAST). Only valid when + * navMaxOffsetKind == RPR_NAV_OFFSET_FIXED. + * + * navFirstOffset: minimum forward offset from match_start (contributed by + * FIRST, compound PREV_FIRST/NEXT_FIRST). Can be negative for compound + * PREV_FIRST. Only valid when navFirstOffsetKind == RPR_NAV_OFFSET_FIXED + * and hasFirstNav == true. + */ + RPRNavOffsetKind navMaxOffsetKind; + int64 navMaxOffset; + + /* true if FIRST-based navigation (FIRST, PREV_FIRST, NEXT_FIRST) is used */ + bool hasFirstNav; + RPRNavOffsetKind navFirstOffsetKind; + int64 navFirstOffset; + /* * false for all apart from the WindowAgg that's closest to the root of * the plan diff --git a/src/include/optimizer/rpr.h b/src/include/optimizer/rpr.h new file mode 100644 index 00000000000..0a14cfad79b --- /dev/null +++ b/src/include/optimizer/rpr.h @@ -0,0 +1,65 @@ +/*------------------------------------------------------------------------- + * + * rpr.h + * Row Pattern Recognition pattern compilation for planner + * + * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/optimizer/rpr.h + * + *------------------------------------------------------------------------- + */ +#ifndef OPTIMIZER_RPR_H +#define OPTIMIZER_RPR_H + +#include "nodes/parsenodes.h" +#include "nodes/plannodes.h" + +/* Limits and special values */ +/* + * Maximum number of unique pattern variables (varId 0 to RPR_VARID_MAX - 1). + * Values from RPR_VARID_BEGIN (252) onward are reserved for control elements. + */ +#define RPR_VARID_MAX 251 +#define RPR_QUANTITY_INF INT32_MAX /* unbounded quantifier */ +#define RPR_COUNT_MAX INT32_MAX /* max runtime count (NFA state) */ +#define RPR_ELEMIDX_MAX INT16_MAX /* max pattern elements */ +#define RPR_ELEMIDX_INVALID ((RPRElemIdx) -1) /* invalid index */ +#define RPR_DEPTH_MAX (UINT8_MAX - 1) /* max pattern nesting depth: 254 */ +#define RPR_DEPTH_NONE UINT8_MAX /* no enclosing group (top-level) */ + +/* Special varId values for control elements (252-255) */ +#define RPR_VARID_BEGIN ((RPRVarId) 252) /* group begin */ +#define RPR_VARID_END ((RPRVarId) 253) /* group end */ +#define RPR_VARID_ALT ((RPRVarId) 254) /* alternation start */ +#define RPR_VARID_FIN ((RPRVarId) 255) /* pattern finish */ + +/* Element flags */ +#define RPR_ELEM_RELUCTANT 0x01 /* reluctant (non-greedy) + * quantifier */ +#define RPR_ELEM_EMPTY_LOOP 0x02 /* END: group body can produce + * empty match */ +#define RPR_ELEM_ABSORBABLE_BRANCH 0x04 /* element in absorbable region */ +#define RPR_ELEM_ABSORBABLE 0x08 /* absorption judgment point */ + +/* Accessor macros for RPRPatternElement */ +#define RPRElemIsReluctant(e) (((e)->flags & RPR_ELEM_RELUCTANT) != 0) +#define RPRElemCanEmptyLoop(e) (((e)->flags & RPR_ELEM_EMPTY_LOOP) != 0) +#define RPRElemIsAbsorbableBranch(e) (((e)->flags & RPR_ELEM_ABSORBABLE_BRANCH) != 0) +#define RPRElemIsAbsorbable(e) (((e)->flags & RPR_ELEM_ABSORBABLE) != 0) +#define RPRElemIsVar(e) ((e)->varId <= RPR_VARID_MAX) +#define RPRElemIsBegin(e) ((e)->varId == RPR_VARID_BEGIN) +#define RPRElemIsEnd(e) ((e)->varId == RPR_VARID_END) +#define RPRElemIsAlt(e) ((e)->varId == RPR_VARID_ALT) +#define RPRElemIsFin(e) ((e)->varId == RPR_VARID_FIN) +#define RPRElemCanSkip(e) ((e)->min == 0) + +extern List *collectPatternVariables(RPRPatternNode *pattern); +extern void buildDefineVariableList(List *defineClause, + List **defineVariableList); +extern RPRPattern *buildRPRPattern(RPRPatternNode *pattern, List *defineVariableList, + RPSkipTo rpSkipTo, int frameOptions, + bool hasMatchStartDependent); + +#endif /* OPTIMIZER_RPR_H */ -- 2.43.0