From 949c3cce37f7ff03b9508002adb20530f09dbe68 Mon Sep 17 00:00:00 2001 From: Peter Smith Date: Tue, 23 Dec 2025 15:41:34 +1100 Subject: [PATCH v20251223] VCI - main - part5 --- contrib/vci/executor/Makefile | 33 + contrib/vci/executor/meson.build | 18 + contrib/vci/executor/vci_agg.c | 2040 ++++++++++++++++++++++ contrib/vci/executor/vci_aggmergetranstype.c | 133 ++ contrib/vci/executor/vci_aggref.c | 1287 ++++++++++++++ contrib/vci/executor/vci_executor.c | 2116 +++++++++++++++++++++++ contrib/vci/executor/vci_fetch_column_store.c | 1193 +++++++++++++ contrib/vci/executor/vci_gather.c | 157 ++ contrib/vci/executor/vci_param.c | 60 + contrib/vci/executor/vci_plan.c | 235 +++ contrib/vci/executor/vci_plan_func.c | 942 ++++++++++ contrib/vci/executor/vci_planner.c | 1911 ++++++++++++++++++++ contrib/vci/executor/vci_planner_preanalyze.c | 413 +++++ contrib/vci/executor/vci_scan.c | 631 +++++++ contrib/vci/executor/vci_sort.c | 413 +++++ contrib/vci/executor/vci_vector_executor.c | 2301 +++++++++++++++++++++++++ contrib/vci/include/vci_aggref.h | 227 +++ contrib/vci/include/vci_aggref_impl.inc | 873 ++++++++++ contrib/vci/include/vci_executor.h | 895 ++++++++++ contrib/vci/include/vci_fetch_row_store.h | 22 + contrib/vci/include/vci_planner.h | 151 ++ 21 files changed, 16051 insertions(+) create mode 100644 contrib/vci/executor/Makefile create mode 100644 contrib/vci/executor/meson.build create mode 100644 contrib/vci/executor/vci_agg.c create mode 100644 contrib/vci/executor/vci_aggmergetranstype.c create mode 100644 contrib/vci/executor/vci_aggref.c create mode 100644 contrib/vci/executor/vci_executor.c create mode 100644 contrib/vci/executor/vci_fetch_column_store.c create mode 100644 contrib/vci/executor/vci_gather.c create mode 100644 contrib/vci/executor/vci_param.c create mode 100644 contrib/vci/executor/vci_plan.c create mode 100644 contrib/vci/executor/vci_plan_func.c create mode 100644 contrib/vci/executor/vci_planner.c create mode 100644 contrib/vci/executor/vci_planner_preanalyze.c create mode 100644 contrib/vci/executor/vci_scan.c create mode 100644 contrib/vci/executor/vci_sort.c create mode 100644 contrib/vci/executor/vci_vector_executor.c create mode 100644 contrib/vci/include/vci_aggref.h create mode 100644 contrib/vci/include/vci_aggref_impl.inc create mode 100644 contrib/vci/include/vci_executor.h create mode 100644 contrib/vci/include/vci_fetch_row_store.h create mode 100644 contrib/vci/include/vci_planner.h diff --git a/contrib/vci/executor/Makefile b/contrib/vci/executor/Makefile new file mode 100644 index 0000000..97b5c7b --- /dev/null +++ b/contrib/vci/executor/Makefile @@ -0,0 +1,33 @@ +# contrib/vci/executor/Makefile + +SUBOBJS = \ + vci_agg.o \ + vci_aggmergetranstype.o \ + vci_aggref.o \ + vci_executor.o \ + vci_fetch_column_store.o \ + vci_gather.o \ + vci_param.o \ + vci_plan.o \ + vci_planner.o \ + vci_planner_preanalyze.o \ + vci_plan_func.o \ + vci_scan.o \ + vci_sort.o \ + vci_vector_executor.o + +EXTRA_CLEAN = SUBSYS.o $(SUBOBJS) + +PG_CPPFLAGS = -I$(top_srcdir)/contrib/vci/include + +ifdef USE_PGXS +PGXS := $(shell pg_config --pgxs) +include $(PGXS) +else +subdir = contrib/vci/executor +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif + +override CFLAGS += $(CFLAGS_SL) diff --git a/contrib/vci/executor/meson.build b/contrib/vci/executor/meson.build new file mode 100644 index 0000000..7f9fcc2 --- /dev/null +++ b/contrib/vci/executor/meson.build @@ -0,0 +1,18 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +vci_executor_sources = files( + 'vci_agg.c', + 'vci_aggmergetranstype.c', + 'vci_aggref.c', + 'vci_executor.c', + 'vci_fetch_column_store.c', + 'vci_gather.c', + 'vci_param.c', + 'vci_plan.c', + 'vci_planner.c', + 'vci_planner_preanalyze.c', + 'vci_plan_func.c', + 'vci_scan.c', + 'vci_sort.c', + 'vci_vector_executor.c', +) diff --git a/contrib/vci/executor/vci_agg.c b/contrib/vci/executor/vci_agg.c new file mode 100644 index 0000000..30db545 --- /dev/null +++ b/contrib/vci/executor/vci_agg.c @@ -0,0 +1,2040 @@ +/*------------------------------------------------------------------------- + * + * vci_agg.c + * Routines to handle VCI Agg nodes + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/executor/vci_agg.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_aggregate.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_type.h" +#include "commands/explain.h" +#include "commands/explain_format.h" +#include "executor/execdebug.h" +#include "executor/executor.h" +#include "executor/nodeCustom.h" +#include "miscadmin.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/optimizer.h" +#include "optimizer/tlist.h" +#include "parser/parse_agg.h" +#include "parser/parse_coerce.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/expandeddatum.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/syscache.h" +#include "utils/tuplesort.h" + +#include "vci.h" +#include "vci_executor.h" +#include "vci_utils.h" +#include "vci_aggref.h" + +static void advance_transition_function(VciAggState *aggstate, + VciAggStatePerAgg peraggstate, + VciAggStatePerGroup pergroupstate); +static void advance_aggregates_vector(VciAggState *aggstate, VciAggStatePerGroup *entries, int max_slots); +static void find_cols(VciAggState *aggstate, Bitmapset **unaggregated); +static bool find_cols_walker(Node *node, Bitmapset **colnos); +static void build_hash_table(VciAggState *aggstate); +static void hash_create_memory(VciAggState *aggstate); +static List *find_hash_columns(VciAggState *aggstate); +static void lookup_hash_entry_vector(VciAggState *aggstate, + VciAggStatePerGroup *entries, int max_slots); +static TupleTableSlot *agg_retrieve_direct(VciAggState *aggstate); +static void agg_fill_hash_table_vector(VciAggState *aggstate); +static Datum GetAggInitVal(Datum textInitVal, Oid transtype); + +static void vci_agg_BeginCustomPlan_preprocess(VciAggState *aggstate); +static void vci_agg_BeginCustomPlan_postprocess_for_advance_aggref(VciAggState *aggstate); +static void vci_agg_BeginCustomPlan_postprocess_for_vp(VciAggState *aggstate, ExprContext *econtext); +static void vci_ExecFreeExprContext(PlanState *planstate); + +/** + * Initialize all aggregates for a new group of input values. + * + * When called, CurrentMemoryContext should be the per-query context. + * + * copied from src/backend/executor/nodeAgg.c + */ +void +vci_initialize_aggregates(VciAggState *aggstate, + VciAggStatePerAgg peragg, + VciAggStatePerGroup pergroup) +{ + for (int aggno = 0; aggno < aggstate->numaggs; aggno++) + { + VciAggStatePerAgg peraggstate = &peragg[aggno]; + VciAggStatePerGroup pergroupstate = &pergroup[aggno]; + + Assert(peraggstate->numSortCols == 0); + + /* + * (Re)set transValue to the initial value. + * + * Note that when the initial value is pass-by-ref, we must copy it + * (into the aggcontext) since we will pfree the transValue later. + */ + if (peraggstate->initValueIsNull) + pergroupstate->transValue = peraggstate->initValue; + else + { + MemoryContext oldContext; + + oldContext = MemoryContextSwitchTo(aggstate->aggcontext); + pergroupstate->transValue = datumCopy(peraggstate->initValue, + peraggstate->transtypeByVal, + peraggstate->transtypeLen); + MemoryContextSwitchTo(oldContext); + } + pergroupstate->transValueIsNull = peraggstate->initValueIsNull; + + /* + * If the initial value for the transition state doesn't exist in the + * pg_aggregate table then we will let the first non-NULL value + * returned from the outer procNode become the initial value. (This is + * useful for aggregates like max() and min().) The noTransValue flag + * signals that we still need to do this. + */ + pergroupstate->noTransValue = peraggstate->initValueIsNull; + } +} + +/** + * Given new input value(s), advance the transition function of an aggregate. + * + * The new values (and null flags) have been preloaded into argument positions + * 1 and up in peraggstate->transfn_fcinfo, so that we needn't copy them again + * to pass to the transition function. We also expect that the static fields + * of the fcinfo are already initialized; that was done by ExecInitAgg(). + * + * It doesn't matter which memory context this is called in. + * + * copied from src/backend/executor/nodeAgg.c + */ +static void +advance_transition_function(VciAggState *aggstate, + VciAggStatePerAgg peraggstate, + VciAggStatePerGroup pergroupstate) +{ + FunctionCallInfo fcinfo = peraggstate->transfn_fcinfo; + MemoryContext oldContext; + Datum newVal; + + if (peraggstate->transfn.fn_strict) + { + /* + * For a strict transfn, nothing happens when there's a NULL input; we + * just keep the prior transValue. + */ + int numTransInputs = peraggstate->numTransInputs; + + for (int i = 1; i <= numTransInputs; i++) + { + if (fcinfo->args[i].isnull) + return; + } + if (pergroupstate->noTransValue) + { + /* + * transValue has not been initialized. This is the first non-NULL + * input value. We use it as the initial value for transValue. (We + * already checked that the agg's input type is binary-compatible + * with its transtype, so straight copy here is OK.) + * + * We must copy the datum into aggcontext if it is pass-by-ref. We + * do not need to pfree the old transValue, since it's NULL. + */ + oldContext = MemoryContextSwitchTo(aggstate->aggcontext); + pergroupstate->transValue = datumCopy(fcinfo->args[1].value, + peraggstate->transtypeByVal, + peraggstate->transtypeLen); + pergroupstate->transValueIsNull = false; + pergroupstate->noTransValue = false; + MemoryContextSwitchTo(oldContext); + return; + } + if (pergroupstate->transValueIsNull) + { + /* + * Don't call a strict function with NULL inputs. Note it is + * possible to get here despite the above tests, if the transfn is + * strict *and* returned a NULL on a prior cycle. If that happens + * we will propagate the NULL all the way to the end. + */ + return; + } + } + + /* We run the transition functions in per-input-tuple memory context */ + oldContext = MemoryContextSwitchTo(aggstate->tmpcontext->ecxt_per_tuple_memory); + + /* set up aggstate->curperagg for AggGetAggref() */ + aggstate->pseudo_aggstate->curperagg = (AggStatePerAgg) peraggstate; /* @remark */ + + /* + * OK to call the transition function + */ + fcinfo->args[0].value = pergroupstate->transValue; + fcinfo->args[0].isnull = pergroupstate->transValueIsNull; + fcinfo->isnull = false; /* just in case transfn doesn't set it */ + + newVal = FunctionCallInvoke(fcinfo); + + aggstate->pseudo_aggstate->curperagg = NULL; + + /* + * If pass-by-ref datatype, must copy the new value into aggcontext and + * pfree the prior transValue. But if transfn returned a pointer to its + * first input, we don't need to do anything. + */ + if (!peraggstate->transtypeByVal && + DatumGetPointer(newVal) != DatumGetPointer(pergroupstate->transValue)) + { + if (!fcinfo->isnull) + { + MemoryContextSwitchTo(aggstate->aggcontext); + newVal = datumCopy(newVal, + peraggstate->transtypeByVal, + peraggstate->transtypeLen); + } + else + { + /* + * Ensure that VciAggStatePerGroup->transValue ends up being 0, so + * callers can safely compare newValue/oldValue without having to + * check their respective nullness. + */ + newVal = (Datum) 0; + } + if (!pergroupstate->transValueIsNull) + pfree(DatumGetPointer(pergroupstate->transValue)); + } + + pergroupstate->transValue = newVal; + pergroupstate->transValueIsNull = fcinfo->isnull; + + MemoryContextSwitchTo(oldContext); +} + +/** + * Perform aggregation processing for 1 input + * + * @param[in,out] aggstate VCI Agg State + * @param[in,out] pergroup Pointer to the VciAggStatePerGroup struct holding the Transition data + */ +void +vci_advance_aggregates(VciAggState *aggstate, VciAggStatePerGroup pergroup) +{ + for (int aggno = 0; aggno < aggstate->numaggs; aggno++) + { + VciAggStatePerAgg peraggstate = &aggstate->peragg[aggno]; + VciAggStatePerGroup pergroupstate = &pergroup[aggno]; + int numTransInputs = peraggstate->numTransInputs; + TupleTableSlot *slot; + + /* Evaluate the current input expressions for this aggregate */ + slot = VciExecProject(peraggstate->evalproj); + + Assert(peraggstate->numSortCols == 0); + + { + /* We can apply the transition function immediately */ + FunctionCallInfo fcinfo = peraggstate->transfn_fcinfo; + + /* Load values into fcinfo */ + /* Start from 1, since the 0th arg will be the transition value */ + Assert(slot->tts_nvalid >= numTransInputs); + for (int i = 0; i < numTransInputs; i++) + { + fcinfo->args[i + 1].value = slot->tts_values[i]; + fcinfo->args[i + 1].isnull = slot->tts_isnull[i]; + } + + advance_transition_function(aggstate, peraggstate, pergroupstate); + } + } +} + +/** + * Perform aggregation processing for 1 vector + * + * @param[in,out] aggstate VCI Agg State + * @param[in,out] entries Pointer to VciAggHashEntry struct holding a pair of hash key and Transition data + * @param[in] max_slots Number of vector rows + */ +static void +advance_aggregates_vector(VciAggState *aggstate, VciAggStatePerGroup *entries, int max_slots) +{ + aggstate->tmpcontext->ecxt_outertuple = NULL; + + for (int aggno = 0; aggno < aggstate->numaggs; aggno++) + { + VciAggStatePerAgg peraggstate = &aggstate->peragg[aggno]; + + /* + * slot_getsomeattrs() is not required + */ + Assert(peraggstate->advance_aggref != NULL); + peraggstate->advance_aggref(aggstate, aggno, entries, max_slots); + } +} + +/** + * Compute the final value of one aggregate. + * + * The finalfunction will be run, and the result delivered, in the + * output-tuple context; caller's CurrentMemoryContext does not matter. + * + * copied from src/backend/executor/nodeAgg.c + */ +void +vci_finalize_aggregate(VciAggState *aggstate, + VciAggStatePerAgg peraggstate, + VciAggStatePerGroup pergroupstate, + Datum *resultVal, bool *resultIsNull) +{ + LOCAL_FCINFO(fcinfo, FUNC_MAX_ARGS); + bool anynull = false; + MemoryContext oldContext; + int i; + + oldContext = MemoryContextSwitchTo(aggstate->vci.css.ss.ps.ps_ExprContext->ecxt_per_tuple_memory); + + /* + * Evaluate any direct arguments. We do this even if there's no finalfn + * (which is unlikely anyway), so that side-effects happen as expected. + * The direct arguments go into arg positions 1 and up, leaving position 0 + * for the transition state value. + */ + i = 1; + + /* + * Apply the agg's finalfn if one is provided, else return transValue. + */ + if (OidIsValid(peraggstate->finalfn_oid)) + { + int numFinalArgs = peraggstate->numFinalArgs; + + /* set up aggstate->curperagg for AggGetAggref() */ + aggstate->pseudo_aggstate->curperagg = (AggStatePerAgg) peraggstate; /* @remark */ + + InitFunctionCallInfoData(*fcinfo, &(peraggstate->finalfn), + numFinalArgs, + peraggstate->aggCollation, + (Node *) aggstate->pseudo_aggstate, NULL); + + /* Fill in the transition state value */ + fcinfo->args[0].value = + MakeExpandedObjectReadOnly(pergroupstate->transValue, + pergroupstate->transValueIsNull, + peraggstate->transtypeLen); + fcinfo->args[0].isnull = pergroupstate->transValueIsNull; + anynull |= pergroupstate->transValueIsNull; + + /* Fill any remaining argument positions with nulls */ + for (; i < numFinalArgs; i++) + { + fcinfo->args[i].value = (Datum) 0; + fcinfo->args[i].isnull = true; + anynull = true; + } + + if (fcinfo->flinfo->fn_strict && anynull) + { + /* don't call a strict function with NULL inputs */ + *resultVal = (Datum) 0; + *resultIsNull = true; + } + else + { + Datum result; + + result = FunctionCallInvoke(fcinfo); + *resultIsNull = fcinfo->isnull; + *resultVal = MakeExpandedObjectReadOnly(result, + fcinfo->isnull, + peraggstate->resulttypeLen); + } + aggstate->pseudo_aggstate->curperagg = NULL; + } + else + { + *resultVal = + MakeExpandedObjectReadOnly(pergroupstate->transValue, + pergroupstate->transValueIsNull, + peraggstate->transtypeLen); + *resultIsNull = pergroupstate->transValueIsNull; + } + + MemoryContextSwitchTo(oldContext); +} + +/** + * find_cols + * Construct a bitmapset of the column numbers of un-aggregated Vars + * appearing in our targetlist and qual (HAVING clause) + * + * copied from src/backend/executor/nodeAgg.c + */ +static void +find_cols(VciAggState *aggstate, Bitmapset **unaggregated) +{ + VciAgg *node = (VciAgg *) aggstate->vci.css.ss.ps.plan; + Bitmapset *colnos; + + colnos = NULL; + (void) find_cols_walker((Node *) node->vci.cscan.scan.plan.targetlist, + &colnos); + (void) find_cols_walker((Node *) node->vci.cscan.scan.plan.qual, + &colnos); + + *unaggregated = colnos; +} + +static bool +find_cols_walker(Node *node, Bitmapset **colnos) +{ + if (node == NULL) + return false; + + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + /* setrefs.c should have set the varno to OUTER_VAR */ + Assert(var->varno == OUTER_VAR); + Assert(var->varlevelsup == 0); + *colnos = bms_add_member(*colnos, var->varattno); + return false; + } + + if (IsA(node, Aggref)) /* do not descend into aggregate exprs */ + return false; + + return expression_tree_walker(node, find_cols_walker, colnos); +} + +/** + * Initialize the hash table to empty. + * + * The hash table always lives in the aggcontext memory context. + * + * copied from src/backend/executor/nodeAgg.c + */ +static void +build_hash_table(VciAggState *aggstate) +{ + VciAgg *node = (VciAgg *) aggstate->vci.css.ss.ps.plan; + MemoryContext metacxt = aggstate->hash_metacxt; + MemoryContext tuplescxt = aggstate->hash_tuplescxt; + MemoryContext tmpcxt = aggstate->tmpcontext->ecxt_per_tuple_memory; + Size additionalsize; + + Assert(node->aggstrategy == AGG_HASHED || + node->aggstrategy == AGG_MIXED); + + Assert(node->numGroups > 0); + + additionalsize = aggstate->numaggs * sizeof(VciAggStatePerGroupData); + + aggstate->hashtable = BuildTupleHashTable(&aggstate->vci.css.ss.ps, + aggstate->hashslot->tts_tupleDescriptor, + NULL, + node->numCols, + node->grpColIdx, + aggstate->eqfuncoids, + aggstate->hashfunctions, + node->grpCollations, + node->numGroups, + additionalsize, + metacxt, + tuplescxt, + tmpcxt, + false); +} + +/** + * Create a list of the tuple columns that actually need to be stored in + * hashtable entries. The incoming tuples from the child plan node will + * contain grouping columns, other columns referenced in our targetlist and + * qual, columns used to compute the aggregate functions, and perhaps just + * junk columns we don't use at all. Only columns of the first two types + * need to be stored in the hashtable, and getting rid of the others can + * make the table entries significantly smaller. To avoid messing up Var + * numbering, we keep the same tuple descriptor for hashtable entries as the + * incoming tuples have, but set unwanted columns to NULL in the tuples that + * go into the table. + * + * To eliminate duplicates, we build a bitmapset of the needed columns, then + * convert it to an integer list (cheaper to scan at runtime). The list is + * in decreasing order so that the first entry is the largest; + * lookup_hash_entry depends on this to use slot_getsomeattrs correctly. + * Note that the list is preserved over ExecReScanAgg, so we allocate it in + * the per-query context (unlike the hash table itself). + * + * Note: at present, searching the tlist/qual is not really necessary since + * the parser should disallow any unaggregated references to ungrouped + * columns. However, the search will be needed when we add support for + * SQL99 semantics that allow use of "functionally dependent" columns that + * haven't been explicitly grouped by. + * + * copied from src/backend/executor/nodeAgg.c + */ +static List * +find_hash_columns(VciAggState *aggstate) +{ + VciAgg *node = (VciAgg *) aggstate->vci.css.ss.ps.plan; + Bitmapset *colnos; + List *collist; + int i; + + /* Find Vars that will be needed in tlist and qual */ + find_cols(aggstate, &colnos); + /* Add in all the grouping columns */ + for (i = 0; i < node->numCols; i++) + colnos = bms_add_member(colnos, node->grpColIdx[i]); + /* Convert to list, using lcons so largest element ends up first */ + collist = NIL; + i = -1; + while ((i = bms_next_member(colnos, i)) >= 0) + collist = lcons_int(i, collist); + bms_free(colnos); + + return collist; +} + +/* + * Create memory contexts used for hash aggregation. + * + * copied from src/backend/executor/nodeAgg.c + */ +static void +hash_create_memory(VciAggState *aggstate) +{ + Size maxBlockSize = ALLOCSET_DEFAULT_MAXSIZE; + +#if 0 + /* + * The hashcontext's per-tuple memory will be used for byref transition + * values and returned by AggCheckCallContext(). + */ + aggstate->hashcontext = CreateWorkExprContext(aggstate->ss.ps.state); +#endif + + /* + * The meta context will be used for the bucket array of + * TupleHashEntryData (or arrays, in the case of grouping sets). As the + * hash table grows, the bucket array will double in size and the old one + * will be freed, so an AllocSet is appropriate. For large bucket arrays, + * the large allocation path will be used, so it's not worth worrying + * about wasting space due to power-of-two allocations. + */ + aggstate->hash_metacxt = AllocSetContextCreate(/* aggstate->ss.ps.state->es_query_cxt, */ + aggstate->vci.css.ss.ps.state->es_query_cxt, + "HashAgg meta context", + ALLOCSET_DEFAULT_SIZES); + + /* + * The hash entries themselves, which include the grouping key + * (firstTuple) and pergroup data, are stored in the table context. The + * bump allocator can be used because the entries are not freed until the + * entire hash table is reset. The bump allocator is faster for + * allocations and avoids wasting space on the chunk header or + * power-of-two allocations. + * + * Like CreateWorkExprContext(), use smaller sizings for smaller work_mem, + * to avoid large jumps in memory usage. + */ + + /* + * Like CreateWorkExprContext(), use smaller sizings for smaller work_mem, + * to avoid large jumps in memory usage. + */ + maxBlockSize = pg_prevpower2_size_t(work_mem * (Size) 1024 / 16); + + /* But no bigger than ALLOCSET_DEFAULT_MAXSIZE */ + maxBlockSize = Min(maxBlockSize, ALLOCSET_DEFAULT_MAXSIZE); + + /* and no smaller than ALLOCSET_DEFAULT_INITSIZE */ + maxBlockSize = Max(maxBlockSize, ALLOCSET_DEFAULT_INITSIZE); + + aggstate->hash_tuplescxt = BumpContextCreate(/*aggstate->ss.ps.state->es_query_cxt,*/ + aggstate->vci.css.ss.ps.state->es_query_cxt, + "HashAgg hashed tuples", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + maxBlockSize); + +} + +static void +lookup_hash_entry_vector(VciAggState *aggstate, + VciAggStatePerGroup *entries, int max_slots) +{ + VciScanState *scanstate = (VciScanState *) outerPlanState(aggstate); + TupleTableSlot *hashslot = aggstate->hashslot; + uint16 *skip_list; + + Assert(scanstate->vci.css.ss.ps.type == T_CustomScanState); + + skip_list = vci_CSGetSkipFromVirtualTuples(scanstate->vector_set); + + /* Clear the tuple */ + ExecClearTuple(hashslot); + + /* + * Fill all the columns of the virtual tuple with nulls + */ + MemSet(hashslot->tts_values, 0, + hashslot->tts_tupleDescriptor->natts * sizeof(Datum)); + memset(hashslot->tts_isnull, true, + hashslot->tts_tupleDescriptor->natts * sizeof(bool)); + + for (int slot_index = skip_list[0]; slot_index < max_slots; slot_index += skip_list[slot_index + 1] + 1) + { + TupleHashEntry entry; + bool isnew; + VciAggStatePerGroup pergroup; + + ExecClearTuple(hashslot); + memset(hashslot->tts_isnull, true, + hashslot->tts_tupleDescriptor->natts * sizeof(bool)); + + for (int i = 0; i < aggstate->num_hash_needed; i++) + { + int varNumber = aggstate->hash_needed[i] - 1; + + hashslot->tts_values[varNumber] = aggstate->hash_input_values[i][slot_index]; + hashslot->tts_isnull[varNumber] = aggstate->hash_input_isnull[i][slot_index]; + } + ExecStoreVirtualTuple(hashslot); + + /* find or create the hashtable entry using the filtered tuple */ + entry = LookupTupleHashEntry(aggstate->hashtable, + hashslot, + &isnew, + NULL); + + pergroup = (VciAggStatePerGroup) TupleHashEntryGetAdditional(aggstate->hashtable, entry); + + if (isnew && aggstate->numaggs) + { + /* initialize aggregates for new tuple group */ + vci_initialize_aggregates(aggstate, aggstate->peragg, pergroup); + } + + entries[slot_index] = pergroup; + } +} + +/** + * ExecAgg for non-hashed case + * + * copied from src/backend/executor/nodeAgg.c + */ +static TupleTableSlot * +agg_retrieve_direct(VciAggState *aggstate) +{ + VciAgg *node = (VciAgg *) aggstate->vci.css.ss.ps.plan; + PlanState *outerPlan; + ExprContext *econtext; + ExprContext *tmpcontext; + Datum *aggvalues; + bool *aggnulls; + VciAggStatePerAgg peragg; + VciAggStatePerGroup pergroup; + TupleTableSlot *outerslot; + TupleTableSlot *firstSlot; + + /* + * get state info from node + */ + outerPlan = outerPlanState(aggstate); + /* econtext is the per-output-tuple expression context */ + econtext = aggstate->vci.css.ss.ps.ps_ExprContext; + aggvalues = econtext->ecxt_aggvalues; + aggnulls = econtext->ecxt_aggnulls; + /* tmpcontext is the per-input-tuple expression context */ + tmpcontext = aggstate->tmpcontext; + peragg = aggstate->peragg; + pergroup = aggstate->pergroup; + firstSlot = aggstate->vci.css.ss.ss_ScanTupleSlot; + + /* + * We loop retrieving groups until we find one matching + * aggstate->ss.ps.qual + */ + while (!aggstate->agg_done) + { + /* + * If we don't already have the first tuple of the new group, fetch it + * from the outer plan. + */ + if (aggstate->grp_firstTuple == NULL) + { + outerslot = ExecProcNode(outerPlan); + if (!TupIsNull(outerslot)) + { + /* + * Make a copy of the first input tuple; we will use this for + * comparisons (in group mode) and for projection. + */ + aggstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot); + } + else + { + /* outer plan produced no tuples at all */ + aggstate->agg_done = true; + /* If we are grouping, we should produce no tuples too */ + if (node->aggstrategy != AGG_PLAIN) + return NULL; + } + } + + /* + * Clear the per-output-tuple context for each group, as well as + * aggcontext (which contains any pass-by-ref transvalues of the old + * group). We also clear any child contexts of the aggcontext; some + * aggregate functions store working state in such contexts. + * + * We use ReScanExprContext not just ResetExprContext because we want + * any registered shutdown callbacks to be called. That allows + * aggregate functions to ensure they've cleaned up any non-memory + * resources. + */ + ReScanExprContext(econtext); + + MemoryContextReset(aggstate->aggcontext); + + /* + * Initialize working state for a new input tuple group + */ + vci_initialize_aggregates(aggstate, peragg, pergroup); + + if (aggstate->grp_firstTuple != NULL) + { + /* + * Store the copied first input tuple in the tuple table slot + * reserved for it. The tuple will be deleted when it is cleared + * from the slot. + */ + ExecForceStoreHeapTuple(aggstate->grp_firstTuple, + firstSlot, + true); + aggstate->grp_firstTuple = NULL; /* don't keep two pointers */ + + /* set up for first advance_aggregates call */ + tmpcontext->ecxt_outertuple = firstSlot; + + /* + * Process each outer-plan tuple, and then fetch the next one, + * until we exhaust the outer plan or cross a group boundary. + */ + for (;;) + { + vci_advance_aggregates(aggstate, pergroup); + + /* Reset per-input-tuple context after each tuple */ + ResetExprContext(tmpcontext); + + outerslot = ExecProcNode(outerPlan); + if (TupIsNull(outerslot)) + { + /* no more outer-plan tuples available */ + aggstate->agg_done = true; + break; + } + /* set up for next advance_aggregates call */ + tmpcontext->ecxt_outertuple = outerslot; + + /* + * If we are grouping, check whether we've crossed a group + * boundary. + */ + if (node->aggstrategy == AGG_SORTED) + { + tmpcontext->ecxt_innertuple = firstSlot; + if (!ExecQual(aggstate->eqfunctions[0], + tmpcontext)) + { + /* + * Save the first input tuple of the next group. + */ + aggstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot); + break; + } + } + } + } + + /* + * Use the representative input tuple for any references to + * non-aggregated input columns in aggregate direct args, the node + * qual, and the tlist. (If we are not grouping, and there are no + * input rows at all, we will come here with an empty firstSlot ... + * but if not grouping, there can't be any references to + * non-aggregated input columns, so no problem.) + */ + econtext->ecxt_outertuple = firstSlot; + + /* + * Done scanning input tuple group. Finalize each aggregate + * calculation, and stash results in the per-output-tuple context. + */ + for (int aggno = 0; aggno < aggstate->numaggs; aggno++) + { + VciAggStatePerAgg peraggstate = &peragg[aggno]; + VciAggStatePerGroup pergroupstate = &pergroup[aggno]; + + Assert(peraggstate->numSortCols == 0); + + vci_finalize_aggregate(aggstate, peraggstate, pergroupstate, + &aggvalues[aggno], &aggnulls[aggno]); + } + + /* + * Check the qual (HAVING clause); if the group does not match, ignore + * it and loop back to try to process another group. + */ + if (ExecQual(aggstate->vci.css.ss.ps.qual, econtext)) + { + /* + * Form and return a projection tuple using the aggregate results + * and the representative input tuple. + */ + TupleTableSlot *result; + + result = VciExecProject(aggstate->vps_ProjInfo); + + return result; + } + else + InstrCountFiltered1(aggstate, 1); + } + + /* No more groups */ + return NULL; +} + +/** + * When Hashed aggregation is selected, tuples are received from lower nodes, + * constructs a has table, and aggregate them. However, processing is performed in vector units. + * + * @param[in,out] aggstate VCI Agg State + */ +void +vci_agg_fill_hash_table(VciAggState *aggstate) +{ + agg_fill_hash_table_vector(aggstate); +} + +static void +agg_fill_hash_table_vector(VciAggState *aggstate) +{ + ExprContext *tmpcontext; + VciScanState *scanstate = (VciScanState *) outerPlanState(aggstate); + + Assert(scanstate->vci.css.ss.ps.type == T_CustomScanState); + + /* + * get state info from node + */ + /* tmpcontext is the per-input-tuple expression context */ + tmpcontext = aggstate->tmpcontext; + + /* + * Process each outer-plan tuple, and then fetch the next one, until we + * exhaust the outer plan. + */ + for (;;) + { + int max_slots; + VciAggStatePerGroup entries[VCI_MAX_FETCHING_ROWS]; + + /* fetch VCI_MAX_FETCHING_ROWS rows from column store */ + max_slots = VciExecProcScanVector(scanstate); + + if (max_slots == 0) + break; + + tmpcontext->ecxt_outertuple = NULL; /* safety */ + + lookup_hash_entry_vector(aggstate, entries, max_slots); + + /* Advance the aggregates */ + advance_aggregates_vector(aggstate, entries, max_slots); + + /* Reset per-input-tuple context after each tuple */ + ResetExprContext(tmpcontext); + + /* Vector loading is complete */ + vci_finish_vector_set_from_column_store(scanstate); + } + + aggstate->table_filled = true; + /* Initialize to walk the hash table */ + ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter); +} + +/** + * Retrieve 1 tuple at a time from the hash table + * + * @param[in,out] aggstate VCI Agg State + * @return Resulting output tuple + * + * @note This function is used after executing vci_agg_fill_hash_table(). + */ +TupleTableSlot * +vci_agg_retrieve_hash_table(VciAggState *aggstate) +{ + ExprContext *econtext; + Datum *aggvalues; + bool *aggnulls; + VciAggStatePerAgg peragg; + VciAggStatePerGroup pergroup; + TupleHashEntry entry; + TupleTableSlot *firstSlot; + + /* + * get state info from node + */ + /* econtext is the per-output-tuple expression context */ + econtext = aggstate->vci.css.ss.ps.ps_ExprContext; + aggvalues = econtext->ecxt_aggvalues; + aggnulls = econtext->ecxt_aggnulls; + peragg = aggstate->peragg; + firstSlot = aggstate->vci.css.ss.ss_ScanTupleSlot; + + /* + * We loop retrieving groups until we find one satisfying + * aggstate->ss.ps.qual + */ + while (!aggstate->agg_done) + { + + /* + * Find the next entry in the hash table + */ + entry = vci_agg_find_group_from_hash_table(aggstate); + if (entry == NULL) + { + /* No more entries in hashtable, so done */ + aggstate->agg_done = true; + return NULL; + } + + /* + * Clear the per-output-tuple context for each group + * + * We intentionally don't use ReScanExprContext here; if any aggs have + * registered shutdown callbacks, they mustn't be called yet, since we + * might not be done with that agg. + */ + ResetExprContext(econtext); + + /* + * Store the copied first input tuple in the tuple table slot reserved + * for it, so that it can be used in ExecProject. + */ + ExecForceStoreMinimalTuple(entry->firstTuple, + firstSlot, + false); + + pergroup = (VciAggStatePerGroup) TupleHashEntryGetAdditional(aggstate->hashtable, entry); + + /* + * Finalize each aggregate calculation, and stash results in the + * per-output-tuple context. + */ + for (int aggno = 0; aggno < aggstate->numaggs; aggno++) + { + VciAggStatePerAgg peraggstate = &peragg[aggno]; + VciAggStatePerGroup pergroupstate = &pergroup[aggno]; + + Assert(peraggstate->numSortCols == 0); + vci_finalize_aggregate(aggstate, peraggstate, pergroupstate, + &aggvalues[aggno], &aggnulls[aggno]); + } + + /* + * Use the representative input tuple for any references to + * non-aggregated input columns in the qual and tlist. + */ + econtext->ecxt_outertuple = firstSlot; + + /* + * Check the qual (HAVING clause); if the group does not match, ignore + * it and loop back to try to process another group. + */ + if (ExecQual(aggstate->vci.css.ss.ps.qual, econtext)) + { + /* + * Form and return a projection tuple using the aggregate results + * and the representative input tuple. + */ + TupleTableSlot *result; + + result = VciExecProject(aggstate->vps_ProjInfo); + + return result; + } + else + InstrCountFiltered1(aggstate, 1); + } + + /* No more groups */ + return NULL; +} + +/** + * Retrive only 1 entry from hash table + * + * @param[in,out] aggstate VCI Agg State + * @return One VciAggHashEntry retrieved from hash table + */ +TupleHashEntry +vci_agg_find_group_from_hash_table(VciAggState *aggstate) +{ + while (!aggstate->agg_done) + { + return (TupleHashEntry) ScanTupleHashTable(aggstate->hashtable, &aggstate->hashiter); + } + + /* No more groups */ + return NULL; +} + +static Datum +GetAggInitVal(Datum textInitVal, Oid transtype) +{ + Oid typinput, + typioparam; + char *strInitVal; + Datum initVal; + + getTypeInputInfo(transtype, &typinput, &typioparam); + strInitVal = TextDatumGetCString(textInitVal); + initVal = OidInputFunctionCall(typinput, strInitVal, + typioparam, -1); + pfree(strInitVal); + return initVal; +} + +/*********************************************************************** + * API exposed to aggregate functions + ***********************************************************************/ + +/* + * The following function is a callback function from AggState, + * but there is no need to directly maintain it in VCI Agg. + * + * - AggCheckCallContext - test if a SQL function is being called as an aggregate + * - AggGetAggref - allow an aggregate support function to get its Aggref + * - AggGetTempMemoryContext - fetch short-term memory context for aggregates + * - AggRegisterCallback - register a cleanup callback for an aggregate + */ + +/* ---------------- + * VciAgg information + * ---------------- + */ +static Node * +vci_agg_CreateCustomScanState(CustomScan *cscan) +{ + VciAgg *vagg = (VciAgg *) cscan; + VciAggState *vas = palloc0_object(VciAggState); + + vas->vci.css.ss.ps.type = T_CustomScanState; + vas->vci.css.ss.ps.plan = (Plan *) vagg; + + vas->vci.css.flags = cscan->flags; + + switch (vagg->aggstrategy) + { + case AGG_HASHED: + vas->vci.css.methods = &vci_hashagg_exec_methods; + break; + + case AGG_SORTED: + vas->vci.css.methods = &vci_groupagg_exec_methods; + break; + + case AGG_PLAIN: + vas->vci.css.methods = &vci_agg_exec_methods; + break; + + default: + break; + } + + vas->aggs = NIL; + vas->numaggs = 0; + vas->eqfunctions = NULL; + vas->hashfunctions = NULL; + vas->peragg = NULL; + vas->agg_done = false; + vas->pergroup = NULL; + vas->grp_firstTuple = NULL; + vas->hashtable = NULL; + + return (Node *) vas; +} + +/** + * ExecCustomPlan callback called from CustomPlanState of VCI Agg + */ +static TupleTableSlot * +vci_agg_ExecCustomPlan(CustomScanState *node) +{ + VciAggState *aggstate; + + aggstate = (VciAggState *) node; + + /* + * Exit if nothing left to do. (We must do the ps_TupFromTlist check + * first, because in some cases agg_done gets set before we emit the final + * aggregate tuple, and we have to finish running SRFs for it.) + */ + if (aggstate->agg_done) + return NULL; + + Assert(IsA(node->ss.ps.plan, CustomScan)); + + /* Dispatch based on strategy */ + if (((VciAgg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED) + { + if (!aggstate->table_filled) + vci_agg_fill_hash_table(aggstate); + return vci_agg_retrieve_hash_table(aggstate); + } + else + return agg_retrieve_direct(aggstate); + + return NULL; +} + +/** + * Copy the contents of VCI Agg State to pseudo Agg state + */ +static void +copy_into_pseudo_aggstate(AggState *pseudo_aggstate, VciAggState *aggstate) +{ + pseudo_aggstate->ss.ps.plan = aggstate->vci.css.ss.ps.plan; + pseudo_aggstate->ss.ps.state = aggstate->vci.css.ss.ps.state; + pseudo_aggstate->ss.ps.instrument = aggstate->vci.css.ss.ps.instrument; + pseudo_aggstate->ss.ps.qual = aggstate->vci.css.ss.ps.qual; + pseudo_aggstate->ss.ps.lefttree = aggstate->vci.css.ss.ps.lefttree; + pseudo_aggstate->ss.ps.righttree = aggstate->vci.css.ss.ps.righttree; + pseudo_aggstate->ss.ps.initPlan = aggstate->vci.css.ss.ps.initPlan; + pseudo_aggstate->ss.ps.subPlan = aggstate->vci.css.ss.ps.subPlan; + pseudo_aggstate->ss.ps.chgParam = aggstate->vci.css.ss.ps.chgParam; + pseudo_aggstate->ss.ps.ps_ResultTupleSlot = aggstate->vci.css.ss.ps.ps_ResultTupleSlot; + pseudo_aggstate->ss.ps.ps_ExprContext = aggstate->vci.css.ss.ps.ps_ExprContext; + pseudo_aggstate->ss.ps.ps_ProjInfo = aggstate->vci.css.ss.ps.ps_ProjInfo; + + pseudo_aggstate->ss.ss_currentRelation = aggstate->vci.css.ss.ss_currentRelation; + pseudo_aggstate->ss.ss_currentScanDesc = aggstate->vci.css.ss.ss_currentScanDesc; + pseudo_aggstate->ss.ss_ScanTupleSlot = aggstate->vci.css.ss.ss_ScanTupleSlot; + + pseudo_aggstate->aggs = aggstate->aggs; + pseudo_aggstate->numaggs = aggstate->numaggs; + pseudo_aggstate->phases[0].eqfunctions = aggstate->eqfunctions; + pseudo_aggstate->perhash->hashfunctions = aggstate->hashfunctions; + pseudo_aggstate->peragg = (AggStatePerAgg) aggstate->peragg; + pseudo_aggstate->tmpcontext = aggstate->tmpcontext; + pseudo_aggstate->curperagg = NULL; + pseudo_aggstate->agg_done = aggstate->agg_done; + pseudo_aggstate->pergroups = (AggStatePerGroup *) &aggstate->pergroup; + pseudo_aggstate->grp_firstTuple = aggstate->grp_firstTuple; + pseudo_aggstate->perhash->hashtable = aggstate->hashtable; + pseudo_aggstate->perhash->hashslot = NULL; + pseudo_aggstate->table_filled = aggstate->table_filled; + pseudo_aggstate->perhash->hashiter = aggstate->hashiter; +} + +/** + * BeginCustomPlan callback called from CustomPlan of VCI Agg + */ +static void +vci_agg_BeginCustomPlan(CustomScanState *node, EState *estate, int eflags) +{ + VciAgg *agg; + VciAggState *aggstate; + VciAggStatePerAgg peragg; + Plan *outerPlan; + ExprContext *econtext; + int max_aggno; + int numaggs; + ListCell *l; + vci_initexpr_t initexpr; + TupleDesc scanDesc; + bool use_hashing; + + agg = (VciAgg *) node->ss.ps.plan; + + use_hashing = (agg->aggstrategy == AGG_HASHED || agg->aggstrategy == AGG_MIXED); + + /* check for unsupported flags */ + Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); + + /* + * create state structure + */ + aggstate = (VciAggState *) node; + + aggstate->vci.css.ss.ps.state = estate; + + if (vci_get_vci_plan_type(outerPlan(agg)) == VCI_CUSTOMPLAN_SCAN) + { + aggstate->enable_vp = true; + } + + vci_agg_BeginCustomPlan_preprocess(aggstate); + + /* + * Create expression contexts. We need three or more, one for + * per-input-tuple processing, one for per-output-tuple processing, and + * one for each grouping set. The per-tuple memory context of the + * per-grouping-set ExprContexts (aggcontexts) replaces the standalone + * memory context formerly used to hold transition values. We cheat a + * little by using ExecAssignExprContext() to build all of them. + * + * NOTE: the details of what is stored in aggcontexts and what is stored + * in the regular per-query memory context are driven by a simple + * decision: we want to reset the aggcontext at group boundaries (if not + * hashing) and in ExecReScanAgg to recover no-longer-wanted space. + */ + ExecAssignExprContext(estate, &aggstate->vci.css.ss.ps); + aggstate->tmpcontext = aggstate->vci.css.ss.ps.ps_ExprContext; + ExecAssignExprContext(estate, &aggstate->vci.css.ss.ps); + + aggstate->pseudo_aggstate->aggcontexts[0] = aggstate->vci.css.ss.ps.ps_ExprContext; + ExecAssignExprContext(estate, &aggstate->vci.css.ss.ps); + + aggstate->aggcontext = + AllocSetContextCreate(CurrentMemoryContext, + "VciAggContext", + ALLOCSET_DEFAULT_SIZES); + + if (use_hashing) + hash_create_memory(aggstate); + + /* + * The timing of ExecInitExpr() for targetlist and qual, and the timing of + * ExecInitNode() for outer node are reversed from the original. + * + * This is because we want VciScanState to exist when Var is evaluated. + */ + + /* + * initialize child nodes + * + * If we are doing a hashed aggregation then the child plan does not need + * to handle REWIND efficiently; see ExecReScanAgg. + */ + if (agg->aggstrategy == AGG_HASHED) + eflags &= ~EXEC_FLAG_REWIND; + outerPlan = outerPlan(node->ss.ps.plan); + + outerPlanState(aggstate) = ExecInitNode(outerPlan, estate, eflags); + + /* + * tuple table initialization + */ + aggstate->vci.css.ss.ps.outerops = + ExecGetResultSlotOps(outerPlanState(&aggstate->vci.css.ss), + &aggstate->vci.css.ss.ps.outeropsfixed); + aggstate->vci.css.ss.ps.outeropsset = true; + + ExecCreateScanSlotFromOuterPlan(estate, &aggstate->vci.css.ss, + aggstate->vci.css.ss.ps.outerops); + scanDesc = aggstate->vci.css.ss.ss_ScanTupleSlot->tts_tupleDescriptor; + + ExecInitResultTupleSlotTL(&aggstate->vci.css.ss.ps, &TTSOpsVirtual); + aggstate->hashslot = ExecInitExtraTupleSlot(estate, scanDesc, &TTSOpsMinimalTuple); + + /* + * In the case of hashed aggregation, Var in targetlist and qual are read + * using outer tuple, but targetlist under Aggref will fetch column store. + * (However if outer is other than VCI Scan, read from outer tuple) + * + * Sorted aggregation and plain aggregation are all read from outer tuple. + */ + if (agg->aggstrategy == AGG_HASHED) + initexpr = VCI_INIT_EXPR_FETCHING_COLUMN_STORE; + else + initexpr = VCI_INIT_EXPR_NORMAL; + + /* + * initialize child expressions + * + * Note: ExecInitExpr finds Aggrefs for us, and also checks that no aggs + * contain other agg calls in their arguments. This would make no sense + * under SQL semantics anyway (and it's forbidden by the spec). Because + * that is true, we don't need to worry about evaluating the aggs in any + * particular order. + */ + aggstate->vci.css.ss.ps.qual = + VciExecInitQual(agg->vci.cscan.scan.plan.qual, (PlanState *) aggstate, initexpr); + + /* + * Initialize projection info. + */ + aggstate->vps_ProjInfo = + VciExecBuildProjectionInfo(aggstate->vci.css.ss.ps.plan->targetlist, + aggstate->vci.css.ss.ps.ps_ExprContext, + aggstate->vci.css.ss.ps.ps_ResultTupleSlot, + &aggstate->vci.css.ss.ps, + NULL); + + /* + * get the count of aggregates in targetlist and quals + */ + max_aggno = -1; + foreach(l, aggstate->aggs) + { + Aggref *aggref = (Aggref *) lfirst(l); + + max_aggno = Max(max_aggno, aggref->aggno); + } + aggstate->numaggs = numaggs = max_aggno + 1; + + /* + * If we are grouping, precompute fmgr lookup data for inner loop. We need + * both equality and hashing functions to do it by hashing, but only + * equality if not hashing. + */ + if (agg->numCols > 0) + { + if (agg->aggstrategy == AGG_HASHED) + execTuplesHashPrepare(agg->numCols, + agg->grpOperators, + &aggstate->eqfuncoids, + &aggstate->hashfunctions); + else + { + aggstate->eqfunctions = + palloc0_array(ExprState *, 1); + aggstate->eqfunctions[0] = + execTuplesMatchPrepare(scanDesc, + agg->numCols, + agg->grpColIdx, + agg->grpOperators, + agg->grpCollations, + (PlanState *) aggstate); + } + } + + /* + * Set up aggregate-result storage in the output expr context, and also + * allocate my private per-agg working storage + */ + econtext = aggstate->vci.css.ss.ps.ps_ExprContext; + econtext->ecxt_aggvalues = palloc0_array(Datum, numaggs); + econtext->ecxt_aggnulls = palloc0_array(bool, numaggs); + + peragg = palloc0_array(VciAggStatePerAggData, numaggs); + aggstate->peragg = peragg; + + if (agg->aggstrategy == AGG_HASHED) + { + int i; + List *hash_need; + ListCell *lc; + + /* Compute the columns we actually need to hash on */ + hash_need = find_hash_columns(aggstate); + aggstate->num_hash_needed = list_length(hash_need); + aggstate->hash_needed = palloc_array(int, aggstate->num_hash_needed); + + Assert(aggstate->num_hash_needed > 0); + + i = 0; + foreach(lc, hash_need) + { + aggstate->hash_needed[i++] = lfirst_int(lc); + + if (aggstate->last_hash_column < lfirst_int(lc)) + aggstate->last_hash_column = lfirst_int(lc); + } + + { + VciScanState *scanstate = (VciScanState *) outerPlanState(aggstate); + + Assert(scanstate->vci.css.ss.ps.type == T_CustomScanState); + + aggstate->hash_input_values = palloc_array(Datum *, aggstate->num_hash_needed); + aggstate->hash_input_isnull = palloc_array(bool *, aggstate->num_hash_needed); + + for (i = 0; i < aggstate->num_hash_needed; i++) + { + int varNumber = aggstate->hash_needed[i] - 1; + + aggstate->hash_input_values[i] = + scanstate->result_values[varNumber]; + + aggstate->hash_input_isnull[i] = + scanstate->result_isnull[varNumber]; + } + } + + build_hash_table(aggstate); + aggstate->table_filled = false; + } + else + { + VciAggStatePerGroup pergroup; + + pergroup = palloc0_array(VciAggStatePerGroupData, numaggs); + aggstate->pergroup = pergroup; + } + + /* + * Perform lookups of aggregate function info, and initialize the + * unchanging fields of the per-agg data. We also detect duplicate + * aggregates (for example, "SELECT sum(x) ... HAVING sum(x) > 0"). When + * duplicates are detected, we only make an AggStatePerAgg struct for the + * first one. The clones are simply pointed at the same result entry by + * giving them duplicate aggno values. + */ + foreach(l, aggstate->aggs) + { + Aggref *aggref = lfirst(l); + VciAggStatePerAgg peraggstate; + Oid inputTypes[FUNC_MAX_ARGS]; + int numArguments; + int numDirectArgs; + int numInputs; + int numSortCols; + int numDistinctCols; + List *sortlist; + HeapTuple aggTuple; + Form_pg_aggregate aggform; + Oid aggtranstype; + AclResult aclresult; + Oid transfn_oid, + finalfn_oid; + Expr *transfnexpr, + *finalfnexpr; + Datum textInitVal; + + /* Planner should have assigned aggregate to correct level */ + Assert(aggref->agglevelsup == 0); + + peraggstate = &peragg[aggref->aggno]; + + /* Check if we initialized the state for this aggregate already. */ + if (peraggstate->aggref != NULL) + continue; + + peraggstate->aggref = aggref; + peraggstate->sortstate = NULL; + + /* Fetch the pg_aggregate row */ + aggTuple = SearchSysCache1(AGGFNOID, + ObjectIdGetDatum(aggref->aggfnoid)); + if (!HeapTupleIsValid(aggTuple)) + elog(ERROR, "cache lookup failed for aggregate %u", + aggref->aggfnoid); + aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple); + + /* Check permission to call aggregate function */ + aclresult = object_aclcheck(ProcedureRelationId, aggref->aggfnoid, GetUserId(), + ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_AGGREGATE, + get_func_name(aggref->aggfnoid)); + InvokeFunctionExecuteHook(aggref->aggfnoid); + + peraggstate->transfn_oid = transfn_oid = aggform->aggtransfn; + peraggstate->finalfn_oid = finalfn_oid = aggform->aggfinalfn; + + /* Check that aggregate owner has permission to call component fns */ + { + HeapTuple procTuple; + Oid aggOwner; + + procTuple = SearchSysCache1(PROCOID, + ObjectIdGetDatum(aggref->aggfnoid)); + if (!HeapTupleIsValid(procTuple)) + elog(ERROR, "cache lookup failed for function %u", + aggref->aggfnoid); + aggOwner = ((Form_pg_proc) GETSTRUCT(procTuple))->proowner; + ReleaseSysCache(procTuple); + + aclresult = object_aclcheck(ProcedureRelationId, transfn_oid, aggOwner, + ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_AGGREGATE, + get_func_name(transfn_oid)); + InvokeFunctionExecuteHook(transfn_oid); + if (OidIsValid(finalfn_oid)) + { + aclresult = object_aclcheck(ProcedureRelationId, finalfn_oid, aggOwner, + ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_AGGREGATE, + get_func_name(finalfn_oid)); + InvokeFunctionExecuteHook(finalfn_oid); + } + } + + /* + * Get actual datatypes of the (nominal) aggregate inputs. These + * could be different from the agg's declared input types, when the + * agg accepts ANY or a polymorphic type. + */ + numArguments = get_aggregate_argtypes(aggref, inputTypes); + peraggstate->numArguments = numArguments; + + /* Count the "direct" arguments, if any */ + numDirectArgs = list_length(aggref->aggdirectargs); + + /* Count the number of aggregated input columns */ + numInputs = list_length(aggref->args); + peraggstate->numInputs = numInputs; + + Assert(!AGGKIND_IS_ORDERED_SET(aggref->aggkind)); + Assert(!aggform->aggfinalextra); + + peraggstate->numTransInputs = numArguments; + peraggstate->numFinalArgs = numDirectArgs + 1; + + /* resolve actual type of transition state, if polymorphic */ + aggtranstype = resolve_aggregate_transtype(aggref->aggfnoid, + aggform->aggtranstype, + inputTypes, + numArguments); + + /* build expression trees using actual argument & result types */ + build_aggregate_transfn_expr(inputTypes, + numArguments, + numDirectArgs, + aggref->aggvariadic, + aggtranstype, + aggref->inputcollid, + transfn_oid, + InvalidOid, /* invtrans is not needed + * here */ + &transfnexpr, + NULL); + + /* set up infrastructure for calling the transfn */ + fmgr_info(transfn_oid, &peraggstate->transfn); + fmgr_info_set_expr((Node *) transfnexpr, &peraggstate->transfn); + + if (OidIsValid(finalfn_oid)) + { + build_aggregate_finalfn_expr(inputTypes, + peragg->numFinalArgs, + aggtranstype, + aggref->aggtype, + aggref->inputcollid, + finalfn_oid, + &finalfnexpr); + + /* set up infrastructure for calling the finalfn */ + fmgr_info(finalfn_oid, &peraggstate->finalfn); + fmgr_info_set_expr((Node *) finalfnexpr, &peraggstate->finalfn); + } + + peraggstate->aggCollation = aggref->inputcollid; + + peraggstate->transfn_fcinfo = + (FunctionCallInfo) palloc(SizeForFunctionCallInfo(peraggstate->numTransInputs + 1)); + InitFunctionCallInfoData(*peraggstate->transfn_fcinfo, + &peraggstate->transfn, + peraggstate->numTransInputs + 1, + peraggstate->aggCollation, + (void *) aggstate->pseudo_aggstate, NULL); + + /* get info about relevant datatypes */ + get_typlenbyval(aggref->aggtype, + &peraggstate->resulttypeLen, + &peraggstate->resulttypeByVal); + get_typlenbyval(aggtranstype, + &peraggstate->transtypeLen, + &peraggstate->transtypeByVal); + + /* + * initval is potentially null, so don't try to access it as a struct + * field. Must do it the hard way with SysCacheGetAttr. + */ + textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple, + Anum_pg_aggregate_agginitval, + &peraggstate->initValueIsNull); + + if (peraggstate->initValueIsNull) + peraggstate->initValue = (Datum) 0; + else + peraggstate->initValue = GetAggInitVal(textInitVal, + aggtranstype); + + /* + * If the transfn is strict and the initval is NULL, make sure input + * type and transtype are the same (or at least binary-compatible), so + * that it's OK to use the first aggregated input value as the initial + * transValue. This should have been checked at agg definition time, + * but we must check again in case the transfn's strictness property + * has been changed. + */ + if (peraggstate->transfn.fn_strict && peraggstate->initValueIsNull) + { + if (numArguments <= numDirectArgs || + !IsBinaryCoercible(inputTypes[numDirectArgs], aggtranstype)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("aggregate %u needs to have compatible input type and transition type", + aggref->aggfnoid))); + } + + /* + * Get a tupledesc corresponding to the aggregated inputs (including + * sort expressions) of the agg. + */ + peraggstate->evaldesc = ExecTypeFromTL(aggref->args); + + /* Create slot we're going to do argument evaluation in */ + peraggstate->evalslot = ExecInitExtraTupleSlot(estate, peraggstate->evaldesc, &TTSOpsMinimalTuple); + + /* Set up projection info for evaluation */ + peraggstate->evalproj = VciExecBuildProjectionInfo(aggref->args, + aggstate->tmpcontext, + peraggstate->evalslot, + &aggstate->vci.css.ss.ps, + NULL); + + Assert(!AGGKIND_IS_ORDERED_SET(aggref->aggkind)); + Assert(!aggref->aggdistinct); + + sortlist = aggref->aggorder; + numSortCols = list_length(sortlist); + numDistinctCols = 0; + + peraggstate->numSortCols = numSortCols; + peraggstate->numDistinctCols = numDistinctCols; + + Assert(numSortCols == 0); + + Assert(aggref->aggdistinct == NIL); + + ReleaseSysCache(aggTuple); + } + + if (agg->aggstrategy == AGG_HASHED) + { + vci_agg_BeginCustomPlan_postprocess_for_advance_aggref(aggstate); + vci_agg_BeginCustomPlan_postprocess_for_vp(aggstate, econtext); + } + + /* Recopy dummy AggState */ + copy_into_pseudo_aggstate(aggstate->pseudo_aggstate, aggstate); +} + +/** + * Create and connect a pseudo Agg state to VCI Agg State + */ +static void +vci_agg_BeginCustomPlan_preprocess(VciAggState *aggstate) +{ + AggState *pseudo_aggstate; + + /* + * Create dummy AggState + * + * aggregation function registered in pg_proc system catalog checks if + * Execution Plan State Node is AggState or WindowsAggState. VciAggState + * is not considered an AggState because it is a CustomPlanState. The + * dummy AggState is used to fool Execution Plan State Node seen by + * aggregation function. + * + * Since it is necessary to set aggstate->pseudo_aggstate at the stage + * when the AggrefState is initialized, insert it before + * vci_agg_BeginCustomPlan. + */ + pseudo_aggstate = makeNode(AggState); + + /* only one (no grouping setsallowed) */ + pseudo_aggstate->aggcontexts = + palloc0_array(ExprContext *, 1); + ExecAssignExprContext(aggstate->vci.css.ss.ps.state, &aggstate->vci.css.ss.ps); + pseudo_aggstate->aggcontexts[0] = aggstate->vci.css.ss.ps.ps_ExprContext; + pseudo_aggstate->curaggcontext = pseudo_aggstate->aggcontexts[0]; + + pseudo_aggstate->phases = palloc0_object(AggStatePerPhaseData); + pseudo_aggstate->phases[0].grouped_cols = NULL; + pseudo_aggstate->phases[0].sortnode = NULL; + pseudo_aggstate->phases[0].numsets = 0; + pseudo_aggstate->phases[0].gset_lengths = NULL; + + pseudo_aggstate->perhash = palloc0_object(AggStatePerHashData); + + copy_into_pseudo_aggstate(pseudo_aggstate, aggstate); + aggstate->pseudo_aggstate = pseudo_aggstate; +} + +/** + * Replace transition function for each Aggref with an optimized version. + */ +static void +vci_agg_BeginCustomPlan_postprocess_for_advance_aggref(VciAggState *aggstate) +{ + for (int aggno = 0; aggno < aggstate->numaggs; aggno++) + { + VciAggStatePerAgg peraggstate = &aggstate->peragg[aggno]; + + peraggstate->advance_aggref = VciGetSpecialAdvanceAggrefFunc(peraggstate); + } +} + +/** + * Create vector processing context from targetlist to execute vector processing + */ +static void +vci_agg_BeginCustomPlan_postprocess_for_vp(VciAggState *aggstate, ExprContext *econtext) +{ + VciScanState *scansate = vci_search_scan_state(&aggstate->vci); + uint16 *skip_list; + + skip_list = vci_CSGetSkipAddrFromVirtualTuples(scansate->vector_set); + + for (int aggno = 0; aggno < aggstate->numaggs; aggno++) + { + VciAggStatePerAgg peraggstate = &aggstate->peragg[aggno]; + VciProjectionInfo *proj = peraggstate->evalproj; + + if (proj->pi_tle_array_len > 0) + proj->pi_vp_tle_array = palloc0_array(VciVPContext *, proj->pi_tle_array_len); + + for (int i = 0; i < proj->pi_tle_array_len; i++) + { + TargetEntry *tle; + + tle = (TargetEntry *) proj->pi_tle_array[i]; + + proj->pi_vp_tle_array[i] = + VciBuildVectorProcessing(tle->expr, (PlanState *) aggstate, + econtext, skip_list); + } + } +} + +/* ---------------- + * vci_ExecFreeExprContext + * + * A plan node's ExprContext should be freed explicitly during executor + * shutdown because there may be shutdown callbacks to call. (Other resources + * made by the above routines, such as projection info, don't need to be freed + * explicitly because they're just memory in the per-query memory context.) + */ +static void +vci_ExecFreeExprContext(PlanState *planstate) +{ + /* + * Per above discussion, don't actually delete the ExprContext. We do + * unlink it from the plan node, though. + */ + planstate->ps_ExprContext = NULL; +} + +/** + * EndCustomPlan callback called from CustomPlanState of VCI Agg + */ +static void +vci_agg_EndCustomPlan(CustomScanState *node) +{ + VciAggState *aggstate; + PlanState *outerPlan; + + aggstate = (VciAggState *) node; + + node = (CustomScanState *) aggstate; + + /* And ensure any agg shutdown callbacks have been called */ + ReScanExprContext(aggstate->vci.css.ss.ps.ps_ExprContext); + + /* + * Free both the expr contexts. + */ + vci_ExecFreeExprContext(&aggstate->vci.css.ss.ps); + node->ss.ps.ps_ExprContext = aggstate->tmpcontext; + vci_ExecFreeExprContext(&aggstate->vci.css.ss.ps); + + MemoryContextDelete(aggstate->aggcontext); + + /* Release hash tables too */ + if (aggstate->hash_metacxt != NULL) + { + MemoryContextDelete(aggstate->hash_metacxt); + aggstate->hash_metacxt = NULL; + } + if (aggstate->hash_tuplescxt != NULL) + { + MemoryContextDelete(aggstate->hash_tuplescxt); + aggstate->hash_tuplescxt = NULL; + } + + outerPlan = outerPlanState(node); + + ExecEndNode(outerPlan); +} + +/** + * ReScanCustomPlan callback called from CustomPlanState of VCI Agg + */ +static void +vci_agg_ReScanCustomPlan(CustomScanState *node) +{ + VciAggState *aggstate; + ExprContext *econtext; + + aggstate = (VciAggState *) node; + + econtext = aggstate->vci.css.ss.ps.ps_ExprContext; + + aggstate->agg_done = false; + + if (((VciAgg *) aggstate->vci.css.ss.ps.plan)->aggstrategy == AGG_HASHED) + { + /* + * In the hashed case, if we haven't yet built the hash table then we + * can just return; nothing done yet, so nothing to undo. If subnode's + * chgParam is not NULL then it will be re-scanned by ExecProcNode, + * else no reason to re-scan it at all. + */ + if (!aggstate->table_filled) + return; + + /* + * If we do have the hash table and the subplan does not have any + * parameter changes, then we can just rescan the existing hash table; + * no need to build it again. + */ + if (aggstate->vci.css.ss.ps.lefttree->chgParam == NULL) + { + ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter); + return; + } + } + + /* We don't need to ReScanExprContext here; ExecReScan already did it */ + + /* Release first tuple of group, if we have made a copy */ + if (aggstate->grp_firstTuple != NULL) + { + heap_freetuple(aggstate->grp_firstTuple); + aggstate->grp_firstTuple = NULL; + } + + /* Forget current agg values */ + MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * aggstate->numaggs); + MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * aggstate->numaggs); + + /* + * Release all temp storage. Note that with AGG_HASHED, the hash table is + * allocated in a sub-context of the hash_metacxt. We're going to rebuild + * the hash table from scratch, so we need to use MemoryContextReset() to + * avoid leaking the old hash table's memory context header. + */ + MemoryContextReset(aggstate->aggcontext); + + Assert(IsA(aggstate->vci.css.ss.ps.plan, CustomScan)); + + if (((VciAgg *) aggstate->vci.css.ss.ps.plan)->aggstrategy == AGG_HASHED) + { + MemoryContextReset(aggstate->hash_metacxt); + MemoryContextReset(aggstate->hash_tuplescxt); + + /* Rebuild an empty hash table */ + build_hash_table(aggstate); + aggstate->table_filled = false; + } + else + { + /* + * Reset the per-group state (in particular, mark transvalues null) + */ + MemSet(aggstate->pergroup, 0, + sizeof(VciAggStatePerGroupData) * aggstate->numaggs); + } + + /* + * if chgParam of subnode is not null then plan will be re-scanned by + * first ExecProcNode. + */ + if (aggstate->vci.css.ss.ps.lefttree->chgParam == NULL) + ExecReScan(aggstate->vci.css.ss.ps.lefttree); +} + +/* LCOV_EXCL_START */ + +/** + * MarkPosCustomPlan callback called by CustomPlanState of VCI Agg + */ +static void +vci_agg_MarkPosCustomPlan(CustomScanState *node) +{ + elog(PANIC, "VCI Agg does not support MarkPosCustomPlan call convention"); +} + +/** + * RestrPosCustomPlan callback called by CustomPlanState of VCI Agg + */ +static void +vci_agg_RestrPosCustomPlan(CustomScanState *node) +{ + elog(PANIC, "VCI Agg does not support RestrPosCustomPlan call convention"); +} + +/* LCOV_EXCL_STOP */ + +/** + * ExplainCustomPlan callback called by CustomPlanState of VCI Agg + */ +static void +vci_agg_ExplainCustomPlan(CustomScanState *cpstate, + List *ancestors, + ExplainState *es) +{ + VciAgg *agg = (VciAgg *) cpstate->ss.ps.plan; + + if (agg->numCols > 0) + { + /* The key columns refer to the tlist of the child plan */ + ancestors = lcons(&cpstate->ss.ps, ancestors); + + ExplainPropertySortGroupKeys(outerPlanState(&cpstate->ss.ps), "Group Key", + agg->numCols, agg->grpColIdx, + ancestors, es); + ancestors = list_delete_first(ancestors); + } +} + +/** + * CopyCustomPlan callback called by CustomPlan of VCI Agg + */ +static CustomScan * +vci_agg_CopyCustomPlan(const CustomScan *_from) +{ + const VciAgg *from = (const VciAgg *) _from; + VciAgg *newnode = (VciAgg *) newNode(sizeof(VciAgg), _from->scan.plan.type); + int numCols; + + vci_copy_plan(&newnode->vci, &from->vci); + + newnode->aggstrategy = from->aggstrategy; + + numCols = from->numCols; + newnode->numCols = numCols; + if (numCols > 0) + { + newnode->grpColIdx = palloc_array(AttrNumber, numCols); + newnode->grpOperators = palloc_array(Oid, numCols); + newnode->grpCollations = palloc_array(Oid, numCols); + for (int i = 0; i < numCols; i++) + { + newnode->grpColIdx[i] = from->grpColIdx[i]; + newnode->grpOperators[i] = from->grpOperators[i]; + newnode->grpCollations[i] = from->grpCollations[i]; + } + } + newnode->numGroups = from->numGroups; + + ((Node *) newnode)->type = nodeTag((Node *) from); + + return &newnode->vci.cscan; +} + +CustomScanMethods vci_agg_scan_methods = { + "VCI Aggregate", + vci_agg_CreateCustomScanState, + vci_agg_CopyCustomPlan +}; + +CustomScanMethods vci_hashagg_scan_methods = { + "VCI HashAggregate", + vci_agg_CreateCustomScanState, + vci_agg_CopyCustomPlan +}; + +CustomScanMethods vci_groupagg_scan_methods = { + "VCI GroupAggregate", + vci_agg_CreateCustomScanState, + vci_agg_CopyCustomPlan +}; + +/** + * VCI Agg's CustomPlanMethods callbacks + */ +CustomExecMethods vci_agg_exec_methods = { + "VCI Aggregate", + vci_agg_BeginCustomPlan, + vci_agg_ExecCustomPlan, + vci_agg_EndCustomPlan, + vci_agg_ReScanCustomPlan, + vci_agg_MarkPosCustomPlan, + vci_agg_RestrPosCustomPlan, + NULL, + NULL, + NULL, + NULL, + NULL, + vci_agg_ExplainCustomPlan, + NULL, + NULL +}; + +/** + * VCI Agg's CustomPlanMethods callbacks + */ +CustomExecMethods vci_hashagg_exec_methods = { + "VCI HashAggregate", + vci_agg_BeginCustomPlan, + vci_agg_ExecCustomPlan, + vci_agg_EndCustomPlan, + vci_agg_ReScanCustomPlan, + vci_agg_MarkPosCustomPlan, + vci_agg_RestrPosCustomPlan, + NULL, + NULL, + NULL, + NULL, + NULL, + vci_agg_ExplainCustomPlan, + NULL, + NULL +}; + +/** + * VCI Agg's CustomPlanMethods callbacks + */ +CustomExecMethods vci_groupagg_exec_methods = { + "VCI GroupAggregate", + vci_agg_BeginCustomPlan, + vci_agg_ExecCustomPlan, + vci_agg_EndCustomPlan, + vci_agg_ReScanCustomPlan, + vci_agg_MarkPosCustomPlan, + vci_agg_RestrPosCustomPlan, + NULL, + NULL, + NULL, + NULL, + NULL, + vci_agg_ExplainCustomPlan, + NULL, + NULL +}; diff --git a/contrib/vci/executor/vci_aggmergetranstype.c b/contrib/vci/executor/vci_aggmergetranstype.c new file mode 100644 index 0000000..61a00a6b --- /dev/null +++ b/contrib/vci/executor/vci_aggmergetranstype.c @@ -0,0 +1,133 @@ +/*------------------------------------------------------------------------- + * + * vci_aggmergetranstype.c + * Parallel merge utility routines to merge between aggregate function's + * internal transition (state) data. + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/executor/vci_aggmergetranstype.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup.h" +#include "access/htup_details.h" +#include "catalog/pg_aggregate.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_type.h" +#include "datatype/timestamp.h" +#include "lib/stringinfo.h" +#include "libpq/pqformat.h" +#include "utils/acl.h" +#include "utils/array.h" +#include "utils/datum.h" +#include "utils/fmgroids.h" +#include "utils/fmgrprotos.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/numeric.h" +#include "utils/syscache.h" +#include "utils/timestamp.h" + +#include "vci.h" + +#include "vci_executor.h" + +#include "postgresql_copy.h" + +/** + * Determine if the given aggregation function is a type that can be supported by VCI + * + * @param[in] aggref Pointer to Aggref that holds the aggregate function to be determined + * @return true if supportable, false if not + */ +bool +vci_is_supported_aggregation(Aggref *aggref) +{ + int numInputs; + HeapTuple aggTuple; + Form_pg_aggregate aggform; + AclResult aclresult; + Oid transfn_oid; + Oid rettype; + Oid *argtypes; + int nargs; + bool ret = false; + + /* not UDF */ + if (FirstNormalObjectId <= aggref->aggfnoid) + { + elog(DEBUG1, "Aggref contains user-defined aggregation"); + return false; + } + + /* 0 or 1 input function */ + numInputs = list_length(aggref->args); + if (1 < numInputs) + { + elog(DEBUG1, "Aggref contains an aggregation with 2 or more arguments"); + return false; + } + + /* Fetch the pg_aggregate row */ + aggTuple = SearchSysCache1(AGGFNOID, + ObjectIdGetDatum(aggref->aggfnoid)); + if (!HeapTupleIsValid(aggTuple)) + elog(ERROR, "cache lookup failed for aggregate %u", + aggref->aggfnoid); + + aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple); + + aclresult = object_aclcheck(ProcedureRelationId, aggref->aggfnoid, GetUserId(), + ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_AGGREGATE, + get_func_name(aggref->aggfnoid)); + + transfn_oid = aggform->aggtransfn; + + /* Check that aggregate owner has permission to call component fns */ + + rettype = get_func_signature(transfn_oid, &argtypes, &nargs); + + if ((rettype != INTERNALOID) && + (nargs == 2) && (rettype == argtypes[0]) && (rettype == argtypes[1])) + { + ret = true; + } + else + { + switch (transfn_oid) + { + case F_FLOAT4_ACCUM: + case F_FLOAT8_ACCUM: + case F_INT8INC: + case F_NUMERIC_ACCUM: + case F_INT2_ACCUM: + case F_INT4_ACCUM: + case F_INT8_ACCUM: + case F_INT2_SUM: + case F_INT4_SUM: + case F_INT2_AVG_ACCUM: + case F_INT4_AVG_ACCUM: + case F_INT8_AVG_ACCUM: + case F_INT8INC_ANY: + case F_NUMERIC_AVG_ACCUM: + case F_INTERVAL_AVG_COMBINE: + ret = true; + break; + default: + break; + } + } + + if (!ret) + elog(DEBUG1, "Aggref contains unsupported aggregation function"); + + ReleaseSysCache(aggTuple); + + return ret; +} diff --git a/contrib/vci/executor/vci_aggref.c b/contrib/vci/executor/vci_aggref.c new file mode 100644 index 0000000..31d4067 --- /dev/null +++ b/contrib/vci/executor/vci_aggref.c @@ -0,0 +1,1287 @@ +/*------------------------------------------------------------------------- + * + * vci_aggref.c + * Routine to inline transition functions for speeding up aggregate functions + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/executor/vci_aggref.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/htup_details.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_aggregate.h" +#include "catalog/pg_proc.h" +#include "commands/explain.h" +#include "executor/execdebug.h" +#include "executor/executor.h" +#include "executor/nodeCustom.h" +#include "miscadmin.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/tlist.h" +#include "parser/parse_agg.h" +#include "parser/parse_coerce.h" +#include "utils/acl.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/cash.h" +#include "utils/date.h" +#include "utils/datum.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/syscache.h" +#include "utils/timestamp.h" +#include "utils/tuplesort.h" + +#include "vci.h" +#include "vci_executor.h" +#include "vci_aggref.h" + +#define VCI_USE_CMP_FUNC +#include "postgresql_copy.h" +#undef VCI_USE_CMP_FUNC + +#define VCI_TRANS_INPUTS_0 (0) +#define VCI_TRANS_INPUTS_1_SIMPLEVAR (1) +#define VCI_TRANS_INPUTS_1_EVALEXPR (2) + +/* + * Default pattern + */ +#define VCI_ADVANCE_AGGREF_FUNC aggref_0input_default +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_0 +#define VCI_TRANFN_OID 0 +#define VCI_TRANS_FN_STRICT peraggstate->transfn.fn_strict +#define VCI_TRANS_TYPE_BYVAL -1 +#define VCI_TRANS_USE_CURPERAGG +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_USE_CURPERAGG +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_default +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID 0 +#define VCI_TRANS_FN_STRICT peraggstate->transfn.fn_strict +#define VCI_TRANS_TYPE_BYVAL -1 +#define VCI_TRANS_USE_CURPERAGG +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_USE_CURPERAGG +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_default +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID 0 +#define VCI_TRANS_FN_STRICT peraggstate->transfn.fn_strict +#define VCI_TRANS_TYPE_BYVAL -1 +#define VCI_TRANS_USE_CURPERAGG +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_USE_CURPERAGG +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +/* + * Individual advance transition routine + */ +#define VCI_ADVANCE_AGGREF_FUNC aggref_0input_int8inc +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_0 +#define VCI_TRANFN_OID F_INT8INC +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_int8inc_any +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INT8INC_ANY +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_float4_accum +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_FLOAT4_ACCUM +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 0 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_float4pl +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_FLOAT4PL +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_float4larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_FLOAT4LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_float4smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_FLOAT4SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_float8pl +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_FLOAT8PL +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_int4larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INT4LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_int4smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INT4SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_cash_pl +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_CASH_PL +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_cashlarger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_CASHLARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_cashsmaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_CASHSMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_date_larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_DATE_LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_date_smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_DATE_SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_interval_pl +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INTERVAL_PL +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 0 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_timestamp_smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_TIMESTAMP_SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_timestamp_larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_TIMESTAMP_LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_interval_smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INTERVAL_SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 0 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_interval_larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INTERVAL_LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 0 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_time_larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_TIME_LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_time_smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_TIME_SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_timetz_larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_TIMETZ_LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 0 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_timetz_smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_TIMETZ_SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 0 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_int2_sum +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INT2_SUM +#define VCI_TRANS_FN_STRICT 0 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_int4_sum +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INT4_SUM +#define VCI_TRANS_FN_STRICT 0 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_int4and +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INT4AND +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_int4or +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INT4OR +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_int4_avg_accum +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INT4_AVG_ACCUM +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 0 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_booland_statefunc +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_BOOLAND_STATEFUNC +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_boolor_statefunc +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_BOOLOR_STATEFUNC +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_int2and +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INT2AND +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_int2or +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INT2OR +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_int2_avg_accum +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INT2_AVG_ACCUM +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 0 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_int2larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INT2LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_int2smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INT2SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_int8and +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INT8AND +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_int8or +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INT8OR +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_int8larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INT8LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_int8smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_INT8SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_float8larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_FLOAT8LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_float8smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_FLOAT8SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_simple_var_float8_accum +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_SIMPLEVAR +#define VCI_TRANFN_OID F_FLOAT8_ACCUM +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 0 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +/* eval expr */ + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_int8inc_any +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INT8INC_ANY +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_float4_accum +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_FLOAT4_ACCUM +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 0 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_float4pl +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_FLOAT4PL +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_float4larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_FLOAT4LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_float4smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_FLOAT4SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_float8pl +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_FLOAT8PL +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_int4larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INT4LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_int4smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INT4SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_cash_pl +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_CASH_PL +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_cashlarger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_CASHLARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_cashsmaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_CASHSMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_date_larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_DATE_LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_date_smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_DATE_SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_interval_pl +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INTERVAL_PL +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 0 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_timestamp_smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_TIMESTAMP_SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_timestamp_larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_TIMESTAMP_LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_interval_smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INTERVAL_SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 0 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_interval_larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INTERVAL_LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 0 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_time_larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_TIME_LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_time_smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_TIME_SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_timetz_larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_TIMETZ_LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 0 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_timetz_smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_TIMETZ_SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 0 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_int2_sum +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INT2_SUM +#define VCI_TRANS_FN_STRICT 0 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_int4_sum +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INT4_SUM +#define VCI_TRANS_FN_STRICT 0 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_int4and +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INT4AND +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_int4or +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INT4OR +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_int4_avg_accum +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INT4_AVG_ACCUM +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 0 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_booland_statefunc +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_BOOLAND_STATEFUNC +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_boolor_statefunc +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_BOOLOR_STATEFUNC +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_int2and +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INT2AND +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_int2or +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INT2OR +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_int2_avg_accum +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INT2_AVG_ACCUM +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 0 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_int2larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INT2LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_int2smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INT2SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 1 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_int8and +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INT8AND +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_int8or +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INT8OR +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_int8larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INT8LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_int8smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_INT8SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_float8larger +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_FLOAT8LARGER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_float8smaller +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_FLOAT8SMALLER +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL USE_FLOAT8_BYVAL +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +#define VCI_ADVANCE_AGGREF_FUNC aggref_eval_expr_float8_accum +#define VCI_TRANS_INPUTS_ARG VCI_TRANS_INPUTS_1_EVALEXPR +#define VCI_TRANFN_OID F_FLOAT8_ACCUM +#define VCI_TRANS_FN_STRICT 1 +#define VCI_TRANS_TYPE_BYVAL 0 +#include "vci_aggref_impl.inc" +#undef VCI_TRANS_TYPE_BYVAL +#undef VCI_TRANS_FN_STRICT +#undef VCI_TRANFN_OID +#undef VCI_TRANS_INPUTS_ARG +#undef VCI_ADVANCE_AGGREF_FUNC + +typedef struct +{ + Oid fn_oid; + short fn_nargs; + bool fn_strict; + bool transtypeByVal; + bool consumeMemory; + bool useCurPerAgg; + VciAdvanceAggref_Func simple_var_func; + VciAdvanceAggref_Func eval_expr_func; +} AggrefTransInfo; + +#define VCI_F_TIMESTAMP_SMALLER 2035 +#define VCI_F_TIMESTAMP_LARGER 2036 + +static int compare_aggref_trans_info(const void *p1, const void *p2); +static AggrefTransInfo *search_aggref_trans_info(Oid oid); + +/** + * Show the inline expansion routine for each transition function + * + * + * @note Array should be ordered in ascending fn_oid order + */ + +#ifdef USE_FLOAT8_BYVAL +#define VCI_FLOAT8_TRANSTYPEBYVAL true +#else +#define VCI_FLOAT8_TRANSTYPEBYVAL false +#endif + +static AggrefTransInfo function_table[] = { + {F_FLOAT4PL, 2, true, true, false, false, aggref_simple_var_float4pl, aggref_eval_expr_float4pl}, /* 204 */ + {F_FLOAT4_ACCUM, 2, true, false, false, false, aggref_simple_var_float4_accum, aggref_eval_expr_float4_accum}, /* 208 */ + {F_FLOAT4LARGER, 2, true, true, false, false, aggref_simple_var_float4larger, aggref_eval_expr_float4larger}, /* 209 */ + {F_FLOAT4SMALLER, 2, true, true, false, false, aggref_simple_var_float4smaller, aggref_eval_expr_float4smaller}, /* 211 */ + {F_FLOAT8PL, 2, true, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_simple_var_float8pl, aggref_eval_expr_float8pl}, /* 218 */ + {F_FLOAT8_ACCUM, 2, true, false, false, false, aggref_simple_var_float8_accum, aggref_eval_expr_float8_accum}, /* 222 */ + {F_FLOAT8LARGER, 2, true, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_simple_var_float8larger, aggref_eval_expr_float8larger}, /* 223 */ + {F_FLOAT8SMALLER, 2, true, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_simple_var_float8smaller, aggref_eval_expr_float8smaller}, /* 224 */ + {F_INT4LARGER, 2, true, true, false, false, aggref_simple_var_int4larger, aggref_eval_expr_int4larger}, /* 768 */ + {F_INT4SMALLER, 2, true, true, false, false, aggref_simple_var_int4smaller, aggref_eval_expr_int4smaller}, /* 769 */ + {F_INT2LARGER, 2, true, true, false, false, aggref_simple_var_int2larger, aggref_eval_expr_int2larger}, /* 770 */ + {F_INT2SMALLER, 2, true, true, false, false, aggref_simple_var_int2smaller, aggref_eval_expr_int2smaller}, /* 771 */ + {F_CASH_PL, 2, true, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_simple_var_cash_pl, aggref_eval_expr_cash_pl}, /* 894 */ + {F_CASHLARGER, 2, true, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_simple_var_cashlarger, aggref_eval_expr_cashlarger}, /* 898 */ + {F_CASHSMALLER, 2, true, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_simple_var_cashsmaller, aggref_eval_expr_cashsmaller}, /* 899 */ + {F_DATE_LARGER, 2, true, true, false, false, aggref_simple_var_date_larger, aggref_eval_expr_date_larger}, /* 1138 */ + {F_DATE_SMALLER, 2, true, true, false, false, aggref_simple_var_date_smaller, aggref_eval_expr_date_smaller}, /* 1139 */ + {F_INTERVAL_PL, 2, true, false, true, false, aggref_simple_var_interval_pl, aggref_eval_expr_interval_pl}, /* 1169 */ + {F_TIMESTAMP_SMALLER, 2, true, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_simple_var_timestamp_smaller, aggref_eval_expr_timestamp_smaller}, /* 1195 */ + {F_TIMESTAMP_LARGER, 2, true, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_simple_var_timestamp_larger, aggref_eval_expr_timestamp_larger}, /* 1196 */ + {F_INTERVAL_SMALLER, 2, true, false, false, false, aggref_simple_var_interval_smaller, aggref_eval_expr_interval_smaller}, /* 1197 */ + {F_INTERVAL_LARGER, 2, true, false, false, false, aggref_simple_var_interval_larger, aggref_eval_expr_interval_larger}, /* 1198 */ + {F_INT8INC, 1, true, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_0input_int8inc, NULL}, /* 1219 */ + {F_INT8LARGER, 2, true, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_simple_var_int8larger, aggref_eval_expr_int8larger}, /* 1236 */ + {F_INT8SMALLER, 2, true, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_simple_var_int8smaller, aggref_eval_expr_int8smaller}, /* 1237 */ + {F_TIME_LARGER, 2, true, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_simple_var_time_larger, aggref_eval_expr_time_larger}, /* 1377 */ + {F_TIME_SMALLER, 2, true, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_simple_var_time_smaller, aggref_eval_expr_time_smaller}, /* 1378 */ + {F_TIMETZ_LARGER, 2, true, false, false, false, aggref_simple_var_timetz_larger, aggref_eval_expr_timetz_larger}, /* 1379 */ + {F_TIMETZ_SMALLER, 2, true, false, false, false, aggref_simple_var_timetz_smaller, aggref_eval_expr_timetz_smaller}, /* 1380 */ + {F_INT2_SUM, 2, false, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_simple_var_int2_sum, aggref_eval_expr_int2_sum}, /* 1840 */ + {F_INT4_SUM, 2, false, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_simple_var_int4_sum, aggref_eval_expr_int4_sum}, /* 1841 */ + {F_INT2AND, 2, true, true, false, false, aggref_simple_var_int2and, aggref_eval_expr_int2and}, /* 1892 */ + {F_INT2OR, 2, true, true, false, false, aggref_simple_var_int2or, aggref_eval_expr_int2or}, /* 1893 */ + {F_INT4AND, 2, true, true, false, false, aggref_simple_var_int4and, aggref_eval_expr_int4and}, /* 1898 */ + {F_INT4OR, 2, true, true, false, false, aggref_simple_var_int4or, aggref_eval_expr_int4or}, /* 1899 */ + {F_INT8AND, 2, true, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_simple_var_int8and, aggref_eval_expr_int8and}, /* 1904 */ + {F_INT8OR, 2, true, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_simple_var_int8or, aggref_eval_expr_int8or}, /* 1905 */ + {F_INT2_AVG_ACCUM, 2, true, false, false, false, aggref_simple_var_int2_avg_accum, aggref_eval_expr_int2_avg_accum}, /* 1962 */ + {F_INT4_AVG_ACCUM, 2, true, false, false, false, aggref_simple_var_int4_avg_accum, aggref_eval_expr_int4_avg_accum}, /* 1963 */ + {VCI_F_TIMESTAMP_SMALLER, 2, true, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_simple_var_timestamp_smaller, aggref_eval_expr_timestamp_smaller}, /* 2035 */ + {VCI_F_TIMESTAMP_LARGER, 2, true, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_simple_var_timestamp_larger, aggref_eval_expr_timestamp_larger}, /* 2036 */ + {F_BOOLAND_STATEFUNC, 2, true, true, false, false, aggref_simple_var_booland_statefunc, aggref_eval_expr_booland_statefunc}, /* 2515 */ + {F_BOOLOR_STATEFUNC, 2, true, true, false, false, aggref_simple_var_boolor_statefunc, aggref_eval_expr_boolor_statefunc}, /* 2516 */ + {F_INT8INC_ANY, 2, true, VCI_FLOAT8_TRANSTYPEBYVAL, false, false, aggref_simple_var_int8inc_any, aggref_eval_expr_int8inc_any}, /* 2804 */ +}; + +/** + * Returns routine that individually inlines transition function for aggregate function + * + * When PostgreSQL performs avg aggregation on float4, the transition function that adds each input data (of float4 type) + * is float4_accum(). This function uses information from VciAggStatePerAgg to identify the transition function and + * returns a pointer to a fast routine that inlines the transition function, if any. + * + * @param[in] peraggstate Pointer to AggrefState information + * @return Returns pointer to transition routine for the aggregate function. If not supported, returns NULL + */ +VciAdvanceAggref_Func +VciGetSpecialAdvanceAggrefFunc(VciAggStatePerAgg peraggstate) +{ + VciProjectionInfo *projInfo = peraggstate->evalproj; + + if (peraggstate->aggref->aggfilter != NULL || peraggstate->numSortCols > 0) + return NULL; + + if ((peraggstate->numTransInputs == 0) || + (peraggstate->numTransInputs == 1 && + projInfo->pi_numSimpleVars == 1 && projInfo->pi_directMap && projInfo->pi_tle_array_len == 0)) + { + AggrefTransInfo *trans_info_p = search_aggref_trans_info(peraggstate->transfn_oid); + + if (trans_info_p) + { + if (trans_info_p->simple_var_func) + { + if (peraggstate->transfn.fn_nargs != trans_info_p->fn_nargs) + elog(ERROR, "Oid %d fn_nargs = %d, trans_info.fn_nargs = %d", + peraggstate->transfn_oid, peraggstate->transfn.fn_nargs, trans_info_p->fn_nargs); + + if (peraggstate->transfn.fn_strict != trans_info_p->fn_strict) + elog(ERROR, "Oid %d peraggstate fn_strict = %d, trans_info.fn_strict = %d", + peraggstate->transfn_oid, peraggstate->transfn.fn_strict, trans_info_p->fn_strict); + + if (peraggstate->transtypeByVal != trans_info_p->transtypeByVal) + elog(ERROR, "Oid %d transtypeByVal peraggstate = %d, trans_info = %d", + peraggstate->transfn_oid, peraggstate->transtypeByVal, trans_info_p->transtypeByVal); + + return trans_info_p->simple_var_func; + } + } + + if (peraggstate->numTransInputs == 0) + return aggref_0input_default; + else + return aggref_simple_var_default; + } + else if (peraggstate->numTransInputs == 1 && + projInfo->pi_numSimpleVars == 0 && projInfo->pi_tle_array_len == 1) + { + AggrefTransInfo *trans_info_p = search_aggref_trans_info(peraggstate->transfn_oid); + + if (trans_info_p) + { + if (trans_info_p->eval_expr_func) + { + if (peraggstate->transfn.fn_nargs != trans_info_p->fn_nargs) + elog(ERROR, "Oid %d fn_nargs = %d, trans_info.fn_nargs = %d", + peraggstate->transfn_oid, peraggstate->transfn.fn_nargs, trans_info_p->fn_nargs); + + if (peraggstate->transfn.fn_strict != trans_info_p->fn_strict) + elog(ERROR, "Oid %d peraggstate fn_strict = %d, trans_info.fn_strict = %d", + peraggstate->transfn_oid, peraggstate->transfn.fn_strict, trans_info_p->fn_strict); + + if (peraggstate->transtypeByVal != trans_info_p->transtypeByVal) + elog(ERROR, "Oid %d transtypeByVal peraggstate = %d, trans_info = %d", + peraggstate->transfn_oid, peraggstate->transtypeByVal, trans_info_p->transtypeByVal); + + return trans_info_p->eval_expr_func; + } + } + + return aggref_eval_expr_default; + } + + return NULL; +} + +static AggrefTransInfo * +search_aggref_trans_info(Oid oid) +{ + AggrefTransInfo key = {0}; + AggrefTransInfo *res; + + key.fn_oid = oid; + + res = (AggrefTransInfo *) bsearch(&key, function_table, + lengthof(function_table), sizeof(function_table[0]), + compare_aggref_trans_info); + + return res; +} + +static int +compare_aggref_trans_info(const void *p1, const void *p2) +{ + const AggrefTransInfo *info1 = (const AggrefTransInfo *) p1; + const AggrefTransInfo *info2 = (const AggrefTransInfo *) p2; + + if (info1->fn_oid > info2->fn_oid) + return +1; + else if (info1->fn_oid < info2->fn_oid) + return -1; + else + return 0; +} diff --git a/contrib/vci/executor/vci_executor.c b/contrib/vci/executor/vci_executor.c new file mode 100644 index 0000000..5da2a70 --- /dev/null +++ b/contrib/vci/executor/vci_executor.c @@ -0,0 +1,2116 @@ +/*------------------------------------------------------------------------- + * + * vci_executor.c + * Miscellaneous executor utility routines + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/executor/vci_executor.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "access/relscan.h" +#include "access/transam.h" +#include "access/tupconvert.h" +#include "access/xact.h" /* for XactEvent */ +#include "catalog/index.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_type.h" +#include "commands/explain.h" +#include "commands/typecmds.h" +#include "executor/execdebug.h" +#include "executor/execExpr.h" +#include "executor/executor.h" +#include "executor/nodeCustom.h" +#include "executor/nodeSubplan.h" +#include "fmgr.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "nodes/execnodes.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "nodes/nodes.h" +#include "optimizer/planner.h" +#include "parser/parse_coerce.h" +#include "parser/parsetree.h" +#include "pgstat.h" +#include "storage/lmgr.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/jsonfuncs.h" +#include "utils/jsonpath.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/typcache.h" +#include "utils/xml.h" + +#include "vci.h" + +#include "vci_executor.h" +#include "vci_utils.h" + +/** + * Record QueryDesc executing VCI on Executor + * + * - NULL on no execution, and records QueryDesc when executing VCI on ExecutorStart hook + * - Only 1 VCI running query runs at a time (VCI does not run multiple queries in parallel) + * - Return to NULL when VCI ends at ExecutorEnd + * - In case of transaction error, force return to NULL using vci_xact_callback callback + * - In case of error in subtransaction, determine if it is applicable using SubTransactionId + * + * @note There are patterns in which Executor is recursively called, such as when stored procedure is called + * @note When FETCH-ing a DECLARE CURSOR, multiple Executor of queries are called in parallel. + */ +static QueryDesc *vci_execution_query_desc = NULL; +static SubTransactionId vci_execution_subid = InvalidSubTransactionId; + +/** + * Record the first call of vci_executor_run_routine() of VCI execution + * + * - When using cursor, ExecutorRun() may be called multiple times for a query, + * but this is used to limit the setup process to the first time. + * - Even if stored procedure calls executor at multiple stages, it does not change + * at stages unrelated to VCI execution. + */ +static bool vci_executor_run_routine_once = false; + +/* static function decls */ +static bool should_fetch_column_store(Var *var, PlanState *parent); + +static void vci_executor_start_routine(QueryDesc *queryDesc, int eflags); +static void vci_executor_run_routine(QueryDesc *queryDesc, ScanDirection direction, uint64 count); +static void vci_executor_end_routine(QueryDesc *queryDesc); +static void vci_explain_one_query_routine(Query *queryDesc, int cursorOptions, IntoClause *into, + ExplainState *es, const char *queryString, ParamListInfo params, + QueryEnvironment *queryEnv); + +/* Static variables */ +static ExecutorStart_hook_type executor_start_prev; +static ExecutorRun_hook_type executor_run_prev; +static ExecutorEnd_hook_type executor_end_prev; +static ExplainOneQuery_hook_type explain_one_query_prev; + +static void VciExecInitExprRec(Expr *node, PlanState *parent, ExprState *state, Datum *resv, bool *resnull, vci_initexpr_t inittype); +static void VciExecInitFunc(ExprEvalStep *scratch, Expr *node, List *args, Oid funcid, Oid inputcollid, PlanState *parent, ExprState *state, vci_initexpr_t inittype); +static void VciExecInitJsonExpr(JsonExpr *jsexpr, PlanState *parent, ExprState *state, + Datum *resv, bool *resnull, + ExprEvalStep *scratch, vci_initexpr_t inittype); +static void VciExecInitJsonCoercion(ExprState *state, JsonReturning *returning, + ErrorSaveContext *escontext, bool omit_quotes, + bool exists_coerce, + Datum *resv, bool *resnull); + +/** + * Registration of VCI's executor routine + */ +void +vci_setup_executor_hook(void) +{ + executor_start_prev = ExecutorStart_hook; + ExecutorStart_hook = vci_executor_start_routine; + + executor_run_prev = ExecutorRun_hook; + ExecutorRun_hook = vci_executor_run_routine; + + executor_end_prev = ExecutorEnd_hook; + ExecutorEnd_hook = vci_executor_end_routine; + + explain_one_query_prev = ExplainOneQuery_hook; + ExplainOneQuery_hook = vci_explain_one_query_routine; + + ExprEvalVar_hook = VciExecEvalScalarVarFromColumnStore; + ExprEvalParam_hook = VciExecEvalParamExec; + +} + +/** + * ExecutorStart hook callback + */ +static void +vci_executor_start_routine(QueryDesc *queryDesc, int eflags) +{ + SubTransactionId mySubid; + + if (IsParallelWorker()) + goto end; + + mySubid = GetCurrentSubTransactionId(); + + if (vci_execution_query_desc == NULL) + { + /* Start plan rewrite only if no other Executor is running */ + vci_initialize_query_context(queryDesc, eflags); + + if (vci_is_processing_custom_plan()) + { + vci_execution_query_desc = queryDesc; + vci_execution_subid = mySubid; + vci_executor_run_routine_once = false; + } + } + +end: + if (executor_start_prev) + executor_start_prev(queryDesc, eflags); + else + standard_ExecutorStart(queryDesc, eflags); +} + +/** + * ExecutorRun hook callback + */ +static void +vci_executor_run_routine(QueryDesc *queryDesc, ScanDirection direction, uint64 count) +{ + if (IsParallelWorker()) + goto end; + +end: + if (executor_run_prev) + executor_run_prev(queryDesc, direction, count); + else + standard_ExecutorRun(queryDesc, direction, count); +} + +/** + * ExecutorEnd hook callback + */ +static void +vci_executor_end_routine(QueryDesc *queryDesc) +{ + if (executor_end_prev) + executor_end_prev(queryDesc); + else + standard_ExecutorEnd(queryDesc); + + if (IsParallelWorker()) + return; + + if (vci_execution_query_desc == queryDesc) + { + vci_finalize_query_context(); + + /* + * vci_free_query_context call is moved inside + * vci_finalize_query_context , otherwise this call will not delete + * SMC created for parallelism + */ + /* vci_free_query_context(); */ + + vci_execution_query_desc = NULL; + vci_execution_subid = InvalidSubTransactionId; + vci_executor_run_routine_once = false; + } +} + +static void +vci_explain_one_query_routine(Query *queryDesc, int cursorOptions, IntoClause *into, + ExplainState *es, const char *queryString, ParamListInfo params, + QueryEnvironment *queryEnv) +{ + if (explain_one_query_prev) + explain_one_query_prev(queryDesc, cursorOptions, into, es, queryString, params, queryEnv); + else + { + /* + * copy from ExplainOneQuery() in src/backend/commands/explain.c + */ + standard_ExplainOneQuery(queryDesc, cursorOptions, into, es, + queryString, params, queryEnv); + } +} + +/** + * Stop VCI execute at transaction switch time + */ +void +vci_xact_change_handler(XactEvent event) +{ + switch (event) + { + case XACT_EVENT_ABORT: + if (vci_execution_query_desc != NULL) + { + elog(DEBUG1, "vci:executor caught any exception"); + vci_free_query_context(); + } + vci_execution_query_desc = NULL; + vci_execution_subid = InvalidSubTransactionId; + vci_executor_run_routine_once = false; + break; + + case XACT_EVENT_PRE_COMMIT: + case XACT_EVENT_COMMIT: + Assert(vci_execution_query_desc == NULL); + break; + + default: + /** + * XACT_EVENT_PREPARE + * XACT_EVENT_PRE_PREPARE + */ + break; + } +} + +/** + * Event Handler on subxact change. + */ +void +vci_subxact_change_handler(SubXactEvent event, SubTransactionId mySubid) +{ + switch (event) + { + case SUBXACT_EVENT_START_SUB: + break; + + case SUBXACT_EVENT_ABORT_SUB: + if (mySubid == vci_execution_subid) + { + elog(DEBUG1, "vci:executor caught any exception in sub transaction"); + vci_free_query_context(); + + vci_execution_query_desc = NULL; + vci_execution_subid = InvalidSubTransactionId; + vci_executor_run_routine_once = false; + } + break; + + case SUBXACT_EVENT_PRE_COMMIT_SUB: + case SUBXACT_EVENT_COMMIT_SUB: + break; + } +} + +/** + * Determine whether Var fetches from column store + */ +static bool +should_fetch_column_store(Var *var, PlanState *planstate) +{ + Assert(var != NULL); + Assert(planstate != NULL); + Assert(nodeTag(planstate) != T_Invalid); + + if (IsA(planstate, CustomScanState)) + { + CustomScanState *cps = (CustomScanState *) planstate; + uint32 plan_type = cps->flags & VCI_CUSTOMPLAN_MASK; + + if ((plan_type == VCI_CUSTOMPLAN_SCAN) || + (plan_type == VCI_CUSTOMPLAN_SORT) || + (plan_type == VCI_CUSTOMPLAN_AGG)) + { + return true; + } + } + + return false; +} + +/* ---------------------------------------------------------------- + * ExecEvalOper / ExecEvalFunc support routines + * ---------------------------------------------------------------- + */ + +/* + * VciExecInitExprRec + * Append the steps necessary for the evaluation of node to ExprState->steps, + * possibly recursing into sub-expressions of node. + * + * node - expression to evaluate + * parent - parent executor node (or NULL if a standalone expression) + * state - ExprState to whose ->steps to append the necessary operations + * resv / resnull - where to store the result of the node into + * copied from src/backend/executor/execExpr.c + */ +static void +VciExecInitExprRec(Expr *node, PlanState *parent, ExprState *state, + Datum *resv, bool *resnull, vci_initexpr_t inittype) +{ + ExprEvalStep scratch = {0}; + + /* Guard against stack overflow due to overly complex expressions */ + check_stack_depth(); + + /* Step's output location is always what the caller gave us */ + Assert(resv != NULL && resnull != NULL); + scratch.resvalue = resv; + scratch.resnull = resnull; + + switch (nodeTag(node)) + { + case T_Var: + { + Var *variable = (Var *) node; + + Assert(((Var *) node)->varattno != InvalidAttrNumber); + + if ((inittype == VCI_INIT_EXPR_FETCHING_COLUMN_STORE) && + should_fetch_column_store((Var *) node, parent)) + { + /* + * CustomScanState *cstate; VciScanState *vci_scanstate; + * cstate = (CustomScanState *) parent; + * + * Assert(IsA(cstate, CustomScanState)); vci_scanstate = + * vci_search_scan_state((VciPlanState *) parent); + * scratch.opcode = EEOP_VCI_VAR; scratch.d.vci_scanstate + * = vci_scanstate; + * + * This is to make use of OSS structure ExprEvalStep + * rathen then copying it in VCI again for additional + * information on var and param nodes. Searching for + * underlying scan state is postponed to + * vciExecEvalScalarVarFromColumnStore() + */ + scratch.opcode = EEOP_VCI_VAR; + scratch.d.var.vci_parent_planstate = parent; + + } + else if (variable->varattno <= 0) + { + /* system column */ + scratch.d.var.attnum = variable->varattno; + scratch.d.var.vartype = variable->vartype; + scratch.d.var.varreturningtype = variable->varreturningtype; + switch (variable->varno) + { + case INNER_VAR: + scratch.opcode = EEOP_INNER_SYSVAR; + break; + case OUTER_VAR: + scratch.opcode = EEOP_OUTER_SYSVAR; + break; + + /* INDEX_VAR is handled by default case */ + + default: + switch (variable->varreturningtype) + { + case VAR_RETURNING_DEFAULT: + scratch.opcode = EEOP_SCAN_SYSVAR; + break; + case VAR_RETURNING_OLD: + scratch.opcode = EEOP_OLD_SYSVAR; + state->flags |= EEO_FLAG_HAS_OLD; + break; + case VAR_RETURNING_NEW: + scratch.opcode = EEOP_NEW_SYSVAR; + state->flags |= EEO_FLAG_HAS_NEW; + break; + } + break; + } + } + else + { + /* regular user column */ + scratch.d.var.attnum = variable->varattno - 1; + scratch.d.var.vartype = variable->vartype; + scratch.d.var.varreturningtype = variable->varreturningtype; + /* select EEOP_*_FIRST opcode to force one-time checks */ + switch (variable->varno) + { + case INNER_VAR: + scratch.opcode = EEOP_INNER_VAR; + break; + case OUTER_VAR: + scratch.opcode = EEOP_OUTER_VAR; + break; + + /* INDEX_VAR is handled by default case */ + + default: + switch (variable->varreturningtype) + { + case VAR_RETURNING_DEFAULT: + scratch.opcode = EEOP_SCAN_VAR; + break; + case VAR_RETURNING_OLD: + scratch.opcode = EEOP_OLD_VAR; + state->flags |= EEO_FLAG_HAS_OLD; + break; + case VAR_RETURNING_NEW: + scratch.opcode = EEOP_NEW_VAR; + state->flags |= EEO_FLAG_HAS_NEW; + break; + } + break; + } + } + + ExprEvalPushStep(state, &scratch); + break; + } + case T_Const: + { + Const *con = (Const *) node; + + scratch.opcode = EEOP_CONST; + scratch.d.constval.value = con->constvalue; + scratch.d.constval.isnull = con->constisnull; + + ExprEvalPushStep(state, &scratch); + break; + } + case T_Param: + { + Param *param = (Param *) node; + + Assert(param->paramkind == PARAM_EXEC); + scratch.d.param.vci_parent_plan = parent->plan; + scratch.opcode = EEOP_VCI_PARAM_EXEC; + scratch.d.param.paramid = param->paramid; + scratch.d.param.paramtype = param->paramtype; + + ExprEvalPushStep(state, &scratch); + break; + } + case T_CaseTestExpr: + + /* + * Read from location identified by innermost_caseval. Note that + * innermost_caseval could be NULL, if this node isn't actually + * within a CASE structure; some parts of the system abuse + * CaseTestExpr to cause a read of a value externally supplied in + * econtext->caseValue_datum. We'll take care of that by + * generating a specialized operation. + */ + if (state->innermost_caseval == NULL) + scratch.opcode = EEOP_CASE_TESTVAL_EXT; + else + { + scratch.opcode = EEOP_CASE_TESTVAL; + scratch.d.casetest.value = state->innermost_caseval; + scratch.d.casetest.isnull = state->innermost_casenull; + } + ExprEvalPushStep(state, &scratch); + break; + + case T_Aggref: + { + Aggref *aggref = (Aggref *) node; + + scratch.opcode = EEOP_AGGREF; + scratch.d.aggref.aggno = aggref->aggno; + + if (parent && IsA(parent, CustomScanState)) + { + VciAggState *aggstate = (VciAggState *) parent; + + aggstate->aggs = lappend(aggstate->aggs, aggref); + } + else + { + /* planner messed up */ + elog(ERROR, "Aggref found in non-Agg plan node"); + } + + ExprEvalPushStep(state, &scratch); + break; + } + break; + + case T_MergeSupportFunc: + { + /* must be in a MERGE, else something messed up */ + if (!state->parent || + !IsA(state->parent, ModifyTableState) || + ((ModifyTableState *) state->parent)->operation != CMD_MERGE) + elog(ERROR, "MergeSupportFunc found in non-merge plan node"); + scratch.opcode = EEOP_MERGE_SUPPORT_FUNC; + ExprEvalPushStep(state, &scratch); + break; + } + + case T_FuncExpr: + { + FuncExpr *func = (FuncExpr *) node; + + VciExecInitFunc(&scratch, node, + func->args, func->funcid, func->inputcollid, + parent, state, inittype); + ExprEvalPushStep(state, &scratch); + break; + } + break; + case T_OpExpr: + { + OpExpr *op = (OpExpr *) node; + + VciExecInitFunc(&scratch, node, + op->args, op->opfuncid, op->inputcollid, + parent, state, inittype); + ExprEvalPushStep(state, &scratch); + break; + } + break; + case T_DistinctExpr: + { + DistinctExpr *op = (DistinctExpr *) node; + + VciExecInitFunc(&scratch, node, + op->args, op->opfuncid, op->inputcollid, + parent, state, inittype); + + /* + * Change opcode of call instruction to EEOP_DISTINCT. + * + * XXX: historically we've not called the function usage + * pgstat infrastructure - that seems inconsistent given that + * we do so for normal function *and* operator evaluation. If + * we decided to do that here, we'd probably want separate + * opcodes for FUSAGE or not. + */ + scratch.opcode = EEOP_DISTINCT; + ExprEvalPushStep(state, &scratch); + break; + } + break; + case T_NullIfExpr: + { + NullIfExpr *op = (NullIfExpr *) node; + + VciExecInitFunc(&scratch, node, + op->args, op->opfuncid, op->inputcollid, + parent, state, inittype); + + /* + * If first argument is of varlena type, we'll need to ensure + * that the value passed to the comparison function is a + * read-only pointer. + */ + scratch.d.func.make_ro = + (get_typlen(exprType((Node *) linitial(op->args))) == -1); + + /* + * Change opcode of call instruction to EEOP_NULLIF. + * + * XXX: historically we've not called the function usage + * pgstat infrastructure - that seems inconsistent given that + * we do so for normal function *and* operator evaluation. If + * we decided to do that here, we'd probably want separate + * opcodes for FUSAGE or not. + */ + scratch.opcode = EEOP_NULLIF; + ExprEvalPushStep(state, &scratch); + break; + } + break; + case T_ScalarArrayOpExpr: + { + ScalarArrayOpExpr *opexpr = (ScalarArrayOpExpr *) node; + Expr *scalararg; + Expr *arrayarg; + FmgrInfo *finfo; + FunctionCallInfo fcinfo; + AclResult aclresult; + + Assert(list_length(opexpr->args) == 2); + scalararg = (Expr *) linitial(opexpr->args); + arrayarg = (Expr *) lsecond(opexpr->args); + + /* Check permission to call function */ + aclresult = object_aclcheck(ProcedureRelationId, opexpr->opfuncid, + GetUserId(), + ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FUNCTION, + get_func_name(opexpr->opfuncid)); + InvokeFunctionExecuteHook(opexpr->opfuncid); + + if (OidIsValid(opexpr->hashfuncid)) + { + aclresult = object_aclcheck(ProcedureRelationId, opexpr->hashfuncid, + GetUserId(), + ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FUNCTION, + get_func_name(opexpr->hashfuncid)); + InvokeFunctionExecuteHook(opexpr->hashfuncid); + } + + /* Set up the primary fmgr lookup information */ + finfo = palloc0_object(FmgrInfo); + fcinfo = palloc0(SizeForFunctionCallInfo(2)); + fmgr_info(opexpr->opfuncid, finfo); + fmgr_info_set_expr((Node *) node, finfo); + InitFunctionCallInfoData(*fcinfo, finfo, 2, + opexpr->inputcollid, NULL, NULL); + + /* + * If hashfuncid is set, we create a EEOP_HASHED_SCALARARRAYOP + * step instead of a EEOP_SCALARARRAYOP. This provides much + * faster lookup performance than the normal linear search + * when the number of items in the array is anything but very + * small. + */ + if (OidIsValid(opexpr->hashfuncid)) + { + + /* Evaluate scalar directly into left function argument */ + VciExecInitExprRec(scalararg, parent, state, + &fcinfo->args[0].value, &fcinfo->args[0].isnull, inittype); + + /* + * Evaluate array argument into our return value. There's + * no danger in that, because the return value is + * guaranteed to be overwritten by + * EEOP_HASHED_SCALARARRAYOP, and will not be passed to + * any other expression. + */ + VciExecInitExprRec(arrayarg, parent, state, resv, resnull, inittype); + + /* And perform the operation */ + scratch.opcode = EEOP_HASHED_SCALARARRAYOP; + scratch.d.hashedscalararrayop.finfo = finfo; + scratch.d.hashedscalararrayop.fcinfo_data = fcinfo; + scratch.d.hashedscalararrayop.saop = opexpr; + + ExprEvalPushStep(state, &scratch); + } + else + { + /* Evaluate scalar directly into left function argument */ + VciExecInitExprRec(scalararg, parent, state, + &fcinfo->args[0].value, &fcinfo->args[0].isnull, inittype); + + /* + * Evaluate array argument into our return value. There's + * no danger in that, because the return value is + * guaranteed to be overwritten by EEOP_SCALARARRAYOP, and + * will not be passed to any other expression. + */ + VciExecInitExprRec(arrayarg, parent, state, resv, resnull, inittype); + + /* And perform the operation */ + scratch.opcode = EEOP_SCALARARRAYOP; + scratch.d.scalararrayop.element_type = InvalidOid; + scratch.d.scalararrayop.useOr = opexpr->useOr; + scratch.d.scalararrayop.finfo = finfo; + scratch.d.scalararrayop.fcinfo_data = fcinfo; + scratch.d.scalararrayop.fn_addr = finfo->fn_addr; + ExprEvalPushStep(state, &scratch); + } + break; + } + break; + case T_BoolExpr: + { + BoolExpr *boolexpr = (BoolExpr *) node; + int nargs = list_length(boolexpr->args); + List *adjust_jumps = NIL; + int off; + ListCell *lc; + + /* allocate scratch memory used by all steps of AND/OR */ + if (boolexpr->boolop != NOT_EXPR) + scratch.d.boolexpr.anynull = palloc_object(bool); + + /* + * For each argument evaluate the argument itself, then + * perform the bool operation's appropriate handling. + * + * We can evaluate each argument into our result area, since + * the short-circuiting logic means we only need to remember + * previous NULL values. + * + * AND/OR is split into separate STEP_FIRST (one) / STEP (zero + * or more) / STEP_LAST (one) steps, as each of those has to + * perform different work. The FIRST/LAST split is valid + * because AND/OR have at least two arguments. + */ + off = 0; + foreach(lc, boolexpr->args) + { + Expr *arg = (Expr *) lfirst(lc); + + /* Evaluate argument into our output variable */ + VciExecInitExprRec(arg, parent, state, resv, resnull, inittype); + + /* Perform the appropriate step type */ + switch (boolexpr->boolop) + { + case AND_EXPR: + Assert(nargs >= 2); + + if (off == 0) + scratch.opcode = EEOP_BOOL_AND_STEP_FIRST; + else if (off + 1 == nargs) + scratch.opcode = EEOP_BOOL_AND_STEP_LAST; + else + scratch.opcode = EEOP_BOOL_AND_STEP; + break; + case OR_EXPR: + Assert(nargs >= 2); + + if (off == 0) + scratch.opcode = EEOP_BOOL_OR_STEP_FIRST; + else if (off + 1 == nargs) + scratch.opcode = EEOP_BOOL_OR_STEP_LAST; + else + scratch.opcode = EEOP_BOOL_OR_STEP; + break; + case NOT_EXPR: + Assert(nargs == 1); + + scratch.opcode = EEOP_BOOL_NOT_STEP; + break; + default: + elog(ERROR, "unrecognized boolop: %d", + (int) boolexpr->boolop); + break; + } + + scratch.d.boolexpr.jumpdone = -1; + ExprEvalPushStep(state, &scratch); + adjust_jumps = lappend_int(adjust_jumps, + state->steps_len - 1); + off++; + } + + /* adjust jump targets */ + foreach(lc, adjust_jumps) + { + ExprEvalStep *as = &state->steps[lfirst_int(lc)]; + + Assert(as->d.boolexpr.jumpdone == -1); + as->d.boolexpr.jumpdone = state->steps_len; + } + } + break; + case T_RelabelType: + { + /* relabel doesn't need to do anything at runtime */ + RelabelType *relabel = (RelabelType *) node; + + VciExecInitExprRec(relabel->arg, parent, state, resv, resnull, inittype); + break; + } + break; + case T_CaseExpr: + { + CaseExpr *caseExpr = (CaseExpr *) node; + List *adjust_jumps = NIL; + Datum *caseval = NULL; + bool *casenull = NULL; + ListCell *lc; + + /* + * If there's a test expression, we have to evaluate it and + * save the value where the CaseTestExpr placeholders can find + * it. + */ + if (caseExpr->arg != NULL) + { + /* Evaluate testexpr into caseval/casenull workspace */ + caseval = palloc_object(Datum); + casenull = palloc_object(bool); + + VciExecInitExprRec(caseExpr->arg, parent, state, + caseval, casenull, inittype); + + /* + * Since value might be read multiple times, force to R/O + * - but only if it could be an expanded datum. + */ + if (get_typlen(exprType((Node *) caseExpr->arg)) == -1) + { + /* change caseval in-place */ + scratch.opcode = EEOP_MAKE_READONLY; + scratch.resvalue = caseval; + scratch.resnull = casenull; + scratch.d.make_readonly.value = caseval; + scratch.d.make_readonly.isnull = casenull; + ExprEvalPushStep(state, &scratch); + /* restore normal settings of scratch fields */ + scratch.resvalue = resv; + scratch.resnull = resnull; + } + } + + /* + * Prepare to evaluate each of the WHEN clauses in turn; as + * soon as one is true we return the value of the + * corresponding THEN clause. If none are true then we return + * the value of the ELSE clause, or NULL if there is none. + */ + foreach(lc, caseExpr->args) + { + CaseWhen *when = (CaseWhen *) lfirst(lc); + Datum *save_innermost_caseval; + bool *save_innermost_casenull; + int whenstep; + + /* + * Make testexpr result available to CaseTestExpr nodes + * within the condition. We must save and restore prior + * setting of innermost_caseval fields, in case this node + * is itself within a larger CASE. + * + * If there's no test expression, we don't actually need + * to save and restore these fields; but it's less code to + * just do so unconditionally. + */ + save_innermost_caseval = state->innermost_caseval; + save_innermost_casenull = state->innermost_casenull; + state->innermost_caseval = caseval; + state->innermost_casenull = casenull; + + /* evaluate condition into CASE's result variables */ + VciExecInitExprRec(when->expr, parent, state, resv, resnull, inittype); + + state->innermost_caseval = save_innermost_caseval; + state->innermost_casenull = save_innermost_casenull; + + /* If WHEN result isn't true, jump to next CASE arm */ + scratch.opcode = EEOP_JUMP_IF_NOT_TRUE; + scratch.d.jump.jumpdone = -1; /* computed later */ + ExprEvalPushStep(state, &scratch); + whenstep = state->steps_len - 1; + + /* + * If WHEN result is true, evaluate THEN result, storing + * it into the CASE's result variables. + */ + VciExecInitExprRec(when->result, parent, state, resv, resnull, inittype); + + /* Emit JUMP step to jump to end of CASE's code */ + scratch.opcode = EEOP_JUMP; + scratch.d.jump.jumpdone = -1; /* computed later */ + ExprEvalPushStep(state, &scratch); + + /* + * Don't know address for that jump yet, compute once the + * whole CASE expression is built. + */ + adjust_jumps = lappend_int(adjust_jumps, + state->steps_len - 1); + + /* + * But we can set WHEN test's jump target now, to make it + * jump to the next WHEN subexpression or the ELSE. + */ + state->steps[whenstep].d.jump.jumpdone = state->steps_len; + } + + /* transformCaseExpr always adds a default */ + Assert(caseExpr->defresult); + + /* evaluate ELSE expr into CASE's result variables */ + VciExecInitExprRec(caseExpr->defresult, parent, state, + resv, resnull, inittype); + + /* adjust jump targets */ + foreach(lc, adjust_jumps) + { + ExprEvalStep *as = &state->steps[lfirst_int(lc)]; + + Assert(as->opcode == EEOP_JUMP); + Assert(as->d.jump.jumpdone == -1); + as->d.jump.jumpdone = state->steps_len; + } + } + break; + case T_CoalesceExpr: + { + CoalesceExpr *coalesce = (CoalesceExpr *) node; + List *adjust_jumps = NIL; + ListCell *lc; + + /* We assume there's at least one arg */ + Assert(coalesce->args != NIL); + + /* + * Prepare evaluation of all coalesced arguments, after each + * one push a step that short-circuits if not null. + */ + foreach(lc, coalesce->args) + { + Expr *e = (Expr *) lfirst(lc); + + /* evaluate argument, directly into result datum */ + VciExecInitExprRec(e, parent, state, resv, resnull, inittype); + + /* if it's not null, skip to end of COALESCE expr */ + scratch.opcode = EEOP_JUMP_IF_NOT_NULL; + scratch.d.jump.jumpdone = -1; /* adjust later */ + ExprEvalPushStep(state, &scratch); + + adjust_jumps = lappend_int(adjust_jumps, + state->steps_len - 1); + } + + /* + * No need to add a constant NULL return - we only can get to + * the end of the expression if a NULL already is being + * returned. + */ + + /* adjust jump targets */ + foreach(lc, adjust_jumps) + { + ExprEvalStep *as = &state->steps[lfirst_int(lc)]; + + Assert(as->opcode == EEOP_JUMP_IF_NOT_NULL); + Assert(as->d.jump.jumpdone == -1); + as->d.jump.jumpdone = state->steps_len; + } + } + break; + case T_MinMaxExpr: + { + MinMaxExpr *minmaxexpr = (MinMaxExpr *) node; + int nelems = list_length(minmaxexpr->args); + TypeCacheEntry *typentry; + FmgrInfo *finfo; + FunctionCallInfo fcinfo; + ListCell *lc; + int off; + + /* Look up the btree comparison function for the datatype */ + typentry = lookup_type_cache(minmaxexpr->minmaxtype, + TYPECACHE_CMP_PROC); + if (!OidIsValid(typentry->cmp_proc)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify a comparison function for type %s", + format_type_be(minmaxexpr->minmaxtype)))); + + /* + * If we enforced permissions checks on index support + * functions, we'd need to make a check here. But the index + * support machinery doesn't do that, and thus neither does + * this code. + */ + + /* Perform function lookup */ + finfo = palloc0_object(FmgrInfo); + fcinfo = palloc0(SizeForFunctionCallInfo(2)); + fmgr_info(typentry->cmp_proc, finfo); + fmgr_info_set_expr((Node *) node, finfo); + InitFunctionCallInfoData(*fcinfo, finfo, 2, + minmaxexpr->inputcollid, NULL, NULL); + + scratch.opcode = EEOP_MINMAX; + /* allocate space to store arguments */ + scratch.d.minmax.values = + palloc_array(Datum, nelems); + scratch.d.minmax.nulls = + palloc_array(bool, nelems); + scratch.d.minmax.nelems = nelems; + + scratch.d.minmax.op = minmaxexpr->op; + scratch.d.minmax.finfo = finfo; + scratch.d.minmax.fcinfo_data = fcinfo; + + /* evaluate expressions into minmax->values/nulls */ + off = 0; + foreach(lc, minmaxexpr->args) + { + Expr *e = (Expr *) lfirst(lc); + + VciExecInitExprRec(e, parent, state, + &scratch.d.minmax.values[off], + &scratch.d.minmax.nulls[off], inittype); + off++; + } + + /* and push the final comparison */ + ExprEvalPushStep(state, &scratch); + break; + } + + case T_SQLValueFunction: + { + SQLValueFunction *svf = (SQLValueFunction *) node; + + scratch.opcode = EEOP_SQLVALUEFUNCTION; + scratch.d.sqlvaluefunction.svf = svf; + + ExprEvalPushStep(state, &scratch); + break; + } + + case T_JsonValueExpr: + { + JsonValueExpr *jve = (JsonValueExpr *) node; + + Assert(jve->raw_expr != NULL); + VciExecInitExprRec(jve->raw_expr, parent, state, resv, resnull, inittype); + Assert(jve->formatted_expr != NULL); + VciExecInitExprRec(jve->formatted_expr, parent, state, resv, resnull, inittype); + + break; + } + + case T_JsonConstructorExpr: + { + JsonConstructorExpr *ctor = (JsonConstructorExpr *) node; + List *args = ctor->args; + ListCell *lc; + int nargs = list_length(args); + int argno = 0; + + if (ctor->func) + { + VciExecInitExprRec(ctor->func, parent, state, resv, resnull, inittype); + } + else if ((ctor->type == JSCTOR_JSON_PARSE && !ctor->unique) || + ctor->type == JSCTOR_JSON_SERIALIZE) + { + /* Use the value of the first argument as result */ + VciExecInitExprRec(linitial(args), parent, state, resv, resnull, inittype); + } + else + { + JsonConstructorExprState *jcstate; + + jcstate = palloc0_object(JsonConstructorExprState); + + scratch.opcode = EEOP_JSON_CONSTRUCTOR; + scratch.d.json_constructor.jcstate = jcstate; + + jcstate->constructor = ctor; + jcstate->arg_values = palloc_array(Datum, nargs); + jcstate->arg_nulls = palloc_array(bool, nargs); + jcstate->arg_types = palloc_array(Oid, nargs); + jcstate->nargs = nargs; + + foreach(lc, args) + { + Expr *arg = (Expr *) lfirst(lc); + + jcstate->arg_types[argno] = exprType((Node *) arg); + + if (IsA(arg, Const)) + { + /* Don't evaluate const arguments every round */ + Const *con = (Const *) arg; + + jcstate->arg_values[argno] = con->constvalue; + jcstate->arg_nulls[argno] = con->constisnull; + } + else + { + VciExecInitExprRec(arg, parent, state, &jcstate->arg_values[argno], &jcstate->arg_nulls[argno], inittype); + } + argno++; + } + + /* prepare type cache for datum_to_json[b]() */ + if (ctor->type == JSCTOR_JSON_SCALAR) + { + bool is_jsonb = + ctor->returning->format->format_type == JS_FORMAT_JSONB; + + jcstate->arg_type_cache = + palloc(sizeof(*jcstate->arg_type_cache) * nargs); + + for (int i = 0; i < nargs; i++) + { + JsonTypeCategory category; + Oid outfuncid; + Oid typid = jcstate->arg_types[i]; + + json_categorize_type(typid, is_jsonb, + &category, &outfuncid); + + jcstate->arg_type_cache[i].outfuncid = outfuncid; + jcstate->arg_type_cache[i].category = (int) category; + } + } + + ExprEvalPushStep(state, &scratch); + } + + if (ctor->coercion) + { + Datum *innermost_caseval = state->innermost_caseval; + bool *innermost_isnull = state->innermost_casenull; + + state->innermost_caseval = resv; + state->innermost_casenull = resnull; + + VciExecInitExprRec(ctor->coercion, parent, state, resv, resnull, inittype); + + state->innermost_caseval = innermost_caseval; + state->innermost_casenull = innermost_isnull; + } + } + break; + + case T_JsonIsPredicate: + { + JsonIsPredicate *pred = (JsonIsPredicate *) node; + + VciExecInitExprRec((Expr *) pred->expr, parent, state, resv, resnull, inittype); + + scratch.opcode = EEOP_IS_JSON; + scratch.d.is_json.pred = pred; + + ExprEvalPushStep(state, &scratch); + break; + } + + case T_JsonExpr: + { + JsonExpr *jsexpr = castNode(JsonExpr, node); + + /* + * No need to initialize a full JsonExprState For + * JSON_TABLE(), because the upstream caller tfuncFetchRows() + * is only interested in the value of formatted_expr. + */ + if (jsexpr->op == JSON_TABLE_OP) + VciExecInitExprRec((Expr *) jsexpr->formatted_expr, parent, state, + resv, resnull, inittype); + else + VciExecInitJsonExpr(jsexpr, parent, state, resv, resnull, &scratch, inittype); + break; + } + + case T_NullTest: + { + NullTest *ntest = (NullTest *) node; + + if (ntest->nulltesttype == IS_NULL) + { + if (ntest->argisrow) + scratch.opcode = EEOP_NULLTEST_ROWISNULL; + else + scratch.opcode = EEOP_NULLTEST_ISNULL; + } + else if (ntest->nulltesttype == IS_NOT_NULL) + { + if (ntest->argisrow) + scratch.opcode = EEOP_NULLTEST_ROWISNOTNULL; + else + scratch.opcode = EEOP_NULLTEST_ISNOTNULL; + } + else + { + elog(ERROR, "unrecognized nulltesttype: %d", + (int) ntest->nulltesttype); + } + /* initialize cache in case it's a row test */ + scratch.d.nulltest_row.rowcache.cacheptr = NULL; + + /* first evaluate argument into result variable */ + VciExecInitExprRec(ntest->arg, parent, state, + resv, resnull, inittype); + + /* then push the test of that argument */ + ExprEvalPushStep(state, &scratch); + break; + } + break; + case T_BooleanTest: + { + BooleanTest *btest = (BooleanTest *) node; + + /* + * Evaluate argument, directly into result datum. That's ok, + * because resv/resnull is definitely not used anywhere else, + * and will get overwritten by the below EEOP_BOOLTEST_IS_* + * step. + */ + VciExecInitExprRec(btest->arg, parent, state, resv, resnull, inittype); + + switch (btest->booltesttype) + { + case IS_TRUE: + scratch.opcode = EEOP_BOOLTEST_IS_TRUE; + break; + case IS_NOT_TRUE: + scratch.opcode = EEOP_BOOLTEST_IS_NOT_TRUE; + break; + case IS_FALSE: + scratch.opcode = EEOP_BOOLTEST_IS_FALSE; + break; + case IS_NOT_FALSE: + scratch.opcode = EEOP_BOOLTEST_IS_NOT_FALSE; + break; + case IS_UNKNOWN: + /* Same as scalar IS NULL test */ + scratch.opcode = EEOP_NULLTEST_ISNULL; + break; + case IS_NOT_UNKNOWN: + /* Same as scalar IS NOT NULL test */ + scratch.opcode = EEOP_NULLTEST_ISNOTNULL; + break; + default: + elog(ERROR, "unrecognized booltesttype: %d", + (int) btest->booltesttype); + } + + ExprEvalPushStep(state, &scratch); + break; + } + break; + case T_CoerceViaIO: + { + CoerceViaIO *iocoerce = (CoerceViaIO *) node; + Oid iofunc; + bool typisvarlena; + Oid typioparam; + FunctionCallInfo fcinfo_in; + + /* evaluate argument into step's result area */ + VciExecInitExprRec(iocoerce->arg, parent, state, resv, resnull, inittype); + + /* + * Prepare both output and input function calls, to be + * evaluated inside a single evaluation step for speed - this + * can be a very common operation. + * + * We don't check permissions here as a type's input/output + * function are assumed to be executable by everyone. + */ + if (state->escontext == NULL) + scratch.opcode = EEOP_IOCOERCE; + else + scratch.opcode = EEOP_IOCOERCE_SAFE; + + /* lookup the source type's output function */ + scratch.d.iocoerce.finfo_out = palloc0_object(FmgrInfo); + scratch.d.iocoerce.fcinfo_data_out = palloc0(SizeForFunctionCallInfo(1)); + + getTypeOutputInfo(exprType((Node *) iocoerce->arg), + &iofunc, &typisvarlena); + fmgr_info(iofunc, scratch.d.iocoerce.finfo_out); + fmgr_info_set_expr((Node *) node, scratch.d.iocoerce.finfo_out); + InitFunctionCallInfoData(*scratch.d.iocoerce.fcinfo_data_out, + scratch.d.iocoerce.finfo_out, + 1, InvalidOid, NULL, NULL); + + /* lookup the result type's input function */ + scratch.d.iocoerce.finfo_in = palloc0_object(FmgrInfo); + scratch.d.iocoerce.fcinfo_data_in = palloc0(SizeForFunctionCallInfo(3)); + + getTypeInputInfo(iocoerce->resulttype, + &iofunc, &typioparam); + fmgr_info(iofunc, scratch.d.iocoerce.finfo_in); + fmgr_info_set_expr((Node *) node, scratch.d.iocoerce.finfo_in); + InitFunctionCallInfoData(*scratch.d.iocoerce.fcinfo_data_in, + scratch.d.iocoerce.finfo_in, + 3, InvalidOid, NULL, NULL); + + /* + * We can preload the second and third arguments for the input + * function, since they're constants. + */ + fcinfo_in = scratch.d.iocoerce.fcinfo_data_in; + fcinfo_in->args[1].value = ObjectIdGetDatum(typioparam); + fcinfo_in->args[1].isnull = false; + fcinfo_in->args[2].value = Int32GetDatum(-1); + fcinfo_in->args[2].isnull = false; + + fcinfo_in->context = (Node *) state->escontext; + + ExprEvalPushStep(state, &scratch); + break; + } + break; + default: + /* LCOV_EXCL_START */ + elog(ERROR, "unrecognized node type: %s(%d)", + VciGetNodeName(nodeTag(node)), (int) nodeTag(node)); + break; + /* LCOV_EXCL_STOP */ + } + +} + +/* + * VciExecInitQual: prepare a qual for execution by ExecQual + * + * Prepares for the evaluation of a conjunctive boolean expression (qual list + * with implicit AND semantics) that returns true if none of the + * subexpressions are false. + * + * We must return true if the list is empty. Since that's a very common case, + * we optimize it a bit further by translating to a NULL ExprState pointer + * rather than setting up an ExprState that computes constant TRUE. (Some + * especially hot-spot callers of ExecQual detect this and avoid calling + * ExecQual at all.) + * + * If any of the subexpressions yield NULL, then the result of the conjunction + * is false. This makes ExecQual primarily useful for evaluating WHERE + * clauses, since SQL specifies that tuples with null WHERE results do not + * get selected. + * copied from src/backend/executor/execExpr.c + */ +ExprState * +VciExecInitQual(List *qual, PlanState *parent, vci_initexpr_t inittype) +{ + ExprState *state; + ExprEvalStep scratch; + List *adjust_jumps = NIL; + + /* short-circuit (here and in ExecQual) for empty restriction list */ + if (qual == NIL) + return NULL; + + Assert(IsA(qual, List)); + + state = makeNode(ExprState); + state->expr = (Expr *) qual; + state->parent = parent; + state->ext_params = NULL; + + /* mark expression as to be used with ExecQual() */ + state->flags = EEO_FLAG_IS_QUAL; + + /* Insert setup steps as needed */ + ExecCreateExprSetupSteps(state, (Node *) qual); + + /* + * ExecQual() needs to return false for an expression returning NULL. That + * allows us to short-circuit the evaluation the first time a NULL is + * encountered. As qual evaluation is a hot-path this warrants using a + * special opcode for qual evaluation that's simpler than BOOL_AND (which + * has more complex NULL handling). + */ + scratch.opcode = EEOP_QUAL; + + /* + * We can use ExprState's resvalue/resnull as target for each qual expr. + */ + scratch.resvalue = &state->resvalue; + scratch.resnull = &state->resnull; + + foreach_ptr(Expr, node, qual) + { + + /* first evaluate expression */ + VciExecInitExprRec(node, parent, state, &state->resvalue, &state->resnull, inittype); + + /* then emit EEOP_QUAL to detect if it's false (or null) */ + scratch.d.qualexpr.jumpdone = -1; + ExprEvalPushStep(state, &scratch); + adjust_jumps = lappend_int(adjust_jumps, + state->steps_len - 1); + } + + /* adjust jump targets */ + foreach_int(jump, adjust_jumps) + { + ExprEvalStep *as = &state->steps[jump]; + + Assert(as->opcode == EEOP_QUAL); + Assert(as->d.qualexpr.jumpdone == -1); + as->d.qualexpr.jumpdone = state->steps_len; + } + + /* + * At the end, we don't need to do anything more. The last qual expr must + * have yielded TRUE, and since its result is stored in the desired output + * location, we're done. + */ + scratch.opcode = EEOP_DONE_RETURN; + ExprEvalPushStep(state, &scratch); + + ExecReadyExpr(state); + + return state; +} + +/* + * Perform setup necessary for the evaluation of a function-like expression, + * appending argument evaluation steps to the steps list in *state, and + * setting up *scratch so it is ready to be pushed. + * + * scratch is not pushed here, so that callers may override the opcode, + * which is useful for function-like cases like DISTINCT. + */ +static void +VciExecInitFunc(ExprEvalStep *scratch, Expr *node, List *args, Oid funcid, + Oid inputcollid, PlanState *parent, ExprState *state, vci_initexpr_t inittype) +{ + int nargs = list_length(args); + AclResult aclresult; + FmgrInfo *flinfo; + FunctionCallInfo fcinfo; + int argno; + ListCell *lc; + + /* Check permission to call function */ + aclresult = object_aclcheck(ProcedureRelationId, funcid, GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FUNCTION, get_func_name(funcid)); + InvokeFunctionExecuteHook(funcid); + + /* + * Safety check on nargs. Under normal circumstances this should never + * fail, as parser should check sooner. But possibly it might fail if + * server has been compiled with FUNC_MAX_ARGS smaller than some functions + * declared in pg_proc? + */ + if (nargs > FUNC_MAX_ARGS) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_ARGUMENTS), + errmsg_plural("cannot pass more than %d argument to a function", + "cannot pass more than %d arguments to a function", + FUNC_MAX_ARGS, + FUNC_MAX_ARGS))); + + /* Allocate function lookup data and parameter workspace for this call */ + scratch->d.func.finfo = palloc0_object(FmgrInfo); + scratch->d.func.fcinfo_data = palloc0(SizeForFunctionCallInfo(nargs)); + flinfo = scratch->d.func.finfo; + fcinfo = scratch->d.func.fcinfo_data; + + /* Set up the primary fmgr lookup information */ + fmgr_info(funcid, flinfo); + fmgr_info_set_expr((Node *) node, flinfo); + + /* Initialize function call parameter structure too */ + InitFunctionCallInfoData(*fcinfo, flinfo, + nargs, inputcollid, NULL, NULL); + + /* Keep extra copies of this info to save an indirection at runtime */ + scratch->d.func.fn_addr = flinfo->fn_addr; + scratch->d.func.nargs = nargs; + + /* We only support non-set functions here */ + if (flinfo->fn_retset) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"), + parent ? executor_errposition(parent->state, + exprLocation((Node *) node)) : 0)); + + /* Build code to evaluate arguments directly into the fcinfo struct */ + argno = 0; + foreach(lc, args) + { + Expr *arg = (Expr *) lfirst(lc); + + if (IsA(arg, Const)) + { + /* + * Don't evaluate const arguments every round; especially + * interesting for constants in comparisons. + */ + Const *con = (Const *) arg; + + fcinfo->args[argno].value = con->constvalue; + fcinfo->args[argno].isnull = con->constisnull; + } + else + { + VciExecInitExprRec(arg, parent, state, + &fcinfo->args[argno].value, &fcinfo->args[argno].isnull, inittype); + } + argno++; + } + + /* Insert appropriate opcode depending on strictness and stats level */ + if (pgstat_track_functions <= flinfo->fn_stats) + { + if (flinfo->fn_strict && nargs > 0) + { + /* Choose nargs optimized implementation if available. */ + if (nargs == 1) + scratch->opcode = EEOP_FUNCEXPR_STRICT_1; + else if (nargs == 2) + scratch->opcode = EEOP_FUNCEXPR_STRICT_2; + else + scratch->opcode = EEOP_FUNCEXPR_STRICT; + } + else + scratch->opcode = EEOP_FUNCEXPR; + } + else + { + if (flinfo->fn_strict && nargs > 0) + scratch->opcode = EEOP_FUNCEXPR_STRICT_FUSAGE; + else + scratch->opcode = EEOP_FUNCEXPR_FUSAGE; + } +} + +/* ---------------------------------------------------------------- + * ExecQual / ExecTargetList / ExecProject + * ---------------------------------------------------------------- + */ + +/** + * ExecProject + * + * projects a tuple based on projection info and stores + * it in the previously specified tuple table slot. + * + * Note: the result is always a virtual tuple; therefore it + * may reference the contents of the exprContext's scan tuples + * and/or temporary results constructed in the exprContext. + * If the caller wishes the result to be valid longer than that + * data will be valid, he must call ExecMaterializeSlot on the + * result slot. + * + * copied from src/include/executor/executor.h + */ +TupleTableSlot * +VciExecProject(VciProjectionInfo *projInfo) +{ + ExprContext *econtext = projInfo->pi_exprContext; + ExprState *state = &projInfo->pi_state; + TupleTableSlot *slot = state->resultslot; + bool isnull; + + /* + * Clear any former contents of the result slot. This makes it safe for + * us to use the slot's Datum/isnull arrays as workspace. + */ + ExecClearTuple(slot); + + /* Run the expression, discarding scalar result from the last column. */ + (void) ExecEvalExprSwitchContext(state, econtext, &isnull); + + /* + * Successfully formed a result row. Mark the result slot as containing a + * valid virtual tuple (inlined version of ExecStoreVirtualTuple()). + */ + slot->tts_flags &= ~TTS_FLAG_EMPTY; + slot->tts_nvalid = slot->tts_tupleDescriptor->natts; + + return slot; +} + +/** + * Generate projection based on target list + * + * @param[in] targetlist Target list + * @param[in] econtext Execution context + * @param[in] slot + * @param[in] inputDesc + * @return VciProjectionInfo type projection + */ +VciProjectionInfo * +VciExecBuildProjectionInfo(List *targetList, + ExprContext *econtext, + TupleTableSlot *slot, + PlanState *parent, + TupleDesc inputDesc) +{ + VciProjectionInfo *projInfo; + ExprState *state; + ExprEvalStep scratch; + ListCell *lc; + int len = ExecTargetListLength(targetList); + int numSimpleVars; + bool directMap; + int exprlist_len; + int tle_id; + int *workspace; + int *varNumbers; + int *varOutputCols; + + projInfo = palloc0_object(VciProjectionInfo); + projInfo->pi_slotMap = palloc0_array(VciProjectionInfoSlot, len); + projInfo->pi_tle_array = palloc0_array(TargetEntry *, len); + + /* since these are all int arrays, we need do just one palloc */ + workspace = palloc_array(int, len * 2); + projInfo->pi_varNumbers = varNumbers = workspace; + projInfo->pi_varOutputCols = varOutputCols = workspace + len; + + projInfo->pi_exprContext = econtext; + /* We embed ExprState into ProjectionInfo instead of doing extra palloc */ + projInfo->pi_state.type = T_ExprState; + state = &projInfo->pi_state; + state->expr = (Expr *) targetList; + state->resultslot = slot; + + numSimpleVars = 0; + tle_id = 0; + exprlist_len = 0; + directMap = true; + + /* Insert setup steps as needed */ + ExecCreateExprSetupSteps(state, (Node *) targetList); + + /* Now compile each tlist column */ + foreach(lc, targetList) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc); + Var *variable = NULL; + AttrNumber attnum = 0; + bool isSafeVar = false; + + /* + * If tlist expression is a safe non-system Var, use the fast-path + * ASSIGN_*_VAR opcodes. "Safe" means that we don't need to apply + * CheckVarSlotCompatibility() during plan startup. If a source slot + * was provided, we make the equivalent tests here; if a slot was not + * provided, we assume that no check is needed because we're dealing + * with a non-relation-scan-level expression. + */ + if (tle->expr != NULL && + IsA(tle->expr, Var) && + ((Var *) tle->expr)->varattno > 0) + { + /* Non-system Var, but how safe is it? */ + variable = (Var *) tle->expr; + attnum = variable->varattno; + + if (inputDesc == NULL) + isSafeVar = true; /* can't check, just assume OK */ + else if (attnum <= inputDesc->natts) + { + Form_pg_attribute attr = TupleDescAttr(inputDesc, attnum - 1); + + /* + * If user attribute is dropped or has a type mismatch, don't + * use ASSIGN_*_VAR. Instead let the normal expression + * machinery handle it (which'll possibly error out). + */ + if (!attr->attisdropped && variable->vartype == attr->atttypid) + { + isSafeVar = true; + } + } + } + + if (isSafeVar) + { + varNumbers[numSimpleVars] = attnum; + varOutputCols[numSimpleVars] = tle->resno; + + if (tle->resno != numSimpleVars + 1) + directMap = false; + + /* Fast-path: just generate an EEOP_ASSIGN_*_VAR step */ + switch (variable->varno) + { + case INNER_VAR: + /* get the tuple from the inner node */ + scratch.opcode = EEOP_ASSIGN_INNER_VAR; + break; + + case OUTER_VAR: + /* get the tuple from the outer node */ + scratch.opcode = EEOP_ASSIGN_OUTER_VAR; + break; + + /* INDEX_VAR is handled by default case */ + + default: + + /* + * Get the tuple from the relation being scanned, or the + * old/new tuple slot, if old/new values were requested. + */ + switch (variable->varreturningtype) + { + case VAR_RETURNING_DEFAULT: + scratch.opcode = EEOP_ASSIGN_SCAN_VAR; + break; + case VAR_RETURNING_OLD: + scratch.opcode = EEOP_ASSIGN_OLD_VAR; + state->flags |= EEO_FLAG_HAS_OLD; + break; + case VAR_RETURNING_NEW: + scratch.opcode = EEOP_ASSIGN_NEW_VAR; + state->flags |= EEO_FLAG_HAS_NEW; + break; + } + break; + } + + scratch.d.assign_var.attnum = attnum - 1; + scratch.d.assign_var.resultnum = tle->resno - 1; + ExprEvalPushStep(state, &scratch); + + projInfo->pi_slotMap[tle_id].is_simple_var = true; + projInfo->pi_slotMap[tle_id].data.simple_var.relid = variable->varno; + projInfo->pi_slotMap[tle_id].data.simple_var.attno = variable->varattno; + + numSimpleVars++; + } + else + { + /* + * Otherwise, compile the column expression normally. + * + * We can't tell the expression to evaluate directly into the + * result slot, as the result slot (and the exprstate for that + * matter) can change between executions. We instead evaluate + * into the ExprState's resvalue/resnull and then move. + */ + VciExecInitExprRec(tle->expr, parent, state, + &state->resvalue, &state->resnull, VCI_INIT_EXPR_NORMAL); + + /* + * Column might be referenced multiple times in upper nodes, so + * force value to R/O - but only if it could be an expanded datum. + */ + if (get_typlen(exprType((Node *) tle->expr)) == -1) + scratch.opcode = EEOP_ASSIGN_TMP_MAKE_RO; + else + scratch.opcode = EEOP_ASSIGN_TMP; + scratch.d.assign_tmp.resultnum = tle->resno - 1; + ExprEvalPushStep(state, &scratch); + + /* Not a simple variable, add it to generic targetlist */ + projInfo->pi_tle_array[exprlist_len] = tle; + + projInfo->pi_slotMap[tle_id].is_simple_var = false; + projInfo->pi_slotMap[tle_id].data.expr.expr_id = exprlist_len; + + exprlist_len++; + } + + tle_id++; + } + + projInfo->pi_tle_array_len = exprlist_len; + + projInfo->pi_numSimpleVars = numSimpleVars; + projInfo->pi_directMap = directMap; + + if (projInfo->pi_tle_array == 0) + projInfo->pi_tle_array = NULL; + + scratch.opcode = EEOP_DONE_RETURN; + ExprEvalPushStep(state, &scratch); + + ExecReadyExpr(state); + + return projInfo; +} + +/* + * Push steps to evaluate a JsonExpr and its various subsidiary expressions. + */ +static void +VciExecInitJsonExpr(JsonExpr *jsexpr, PlanState *parent, ExprState *state, + Datum *resv, bool *resnull, + ExprEvalStep *scratch, vci_initexpr_t inittype) +{ + JsonExprState *jsestate = palloc0_object(JsonExprState); + ListCell *argexprlc; + ListCell *argnamelc; + List *jumps_return_null = NIL; + List *jumps_to_end = NIL; + ListCell *lc; + ErrorSaveContext *escontext; + bool returning_domain = + get_typtype(jsexpr->returning->typid) == TYPTYPE_DOMAIN; + + Assert(jsexpr->on_error != NULL); + + jsestate->jsexpr = jsexpr; + + /* + * Evaluate formatted_expr storing the result into + * jsestate->formatted_expr. + */ + VciExecInitExprRec((Expr *) jsexpr->formatted_expr, parent, state, + &jsestate->formatted_expr.value, + &jsestate->formatted_expr.isnull, inittype); + + /* JUMP to return NULL if formatted_expr evaluates to NULL */ + jumps_return_null = lappend_int(jumps_return_null, state->steps_len); + scratch->opcode = EEOP_JUMP_IF_NULL; + scratch->resnull = &jsestate->formatted_expr.isnull; + scratch->d.jump.jumpdone = -1; /* set below */ + ExprEvalPushStep(state, scratch); + + /* + * Evaluate pathspec expression storing the result into + * jsestate->pathspec. + */ + VciExecInitExprRec((Expr *) jsexpr->path_spec, parent, state, + &jsestate->pathspec.value, + &jsestate->pathspec.isnull, inittype); + + /* JUMP to return NULL if path_spec evaluates to NULL */ + jumps_return_null = lappend_int(jumps_return_null, state->steps_len); + scratch->opcode = EEOP_JUMP_IF_NULL; + scratch->resnull = &jsestate->pathspec.isnull; + scratch->d.jump.jumpdone = -1; /* set below */ + ExprEvalPushStep(state, scratch); + + /* Steps to compute PASSING args. */ + jsestate->args = NIL; + forboth(argexprlc, jsexpr->passing_values, + argnamelc, jsexpr->passing_names) + { + Expr *argexpr = (Expr *) lfirst(argexprlc); + String *argname = lfirst_node(String, argnamelc); + JsonPathVariable *var = palloc_object(JsonPathVariable); + + var->name = argname->sval; + var->typid = exprType((Node *) argexpr); + var->typmod = exprTypmod((Node *) argexpr); + + VciExecInitExprRec((Expr *) argexpr, parent, state, &var->value, &var->isnull, inittype); + + jsestate->args = lappend(jsestate->args, var); + } + + /* Step for jsonpath evaluation; see ExecEvalJsonExprPath(). */ + scratch->opcode = EEOP_JSONEXPR_PATH; + scratch->resvalue = resv; + scratch->resnull = resnull; + scratch->d.jsonexpr.jsestate = jsestate; + ExprEvalPushStep(state, scratch); + + /* + * Step to return NULL after jumping to skip the EEOP_JSONEXPR_PATH step + * when either formatted_expr or pathspec is NULL. Adjust jump target + * addresses of JUMPs that we added above. + */ + foreach(lc, jumps_return_null) + { + ExprEvalStep *as = &state->steps[lfirst_int(lc)]; + + as->d.jump.jumpdone = state->steps_len; + } + scratch->opcode = EEOP_CONST; + scratch->resvalue = resv; + scratch->resnull = resnull; + scratch->d.constval.value = (Datum) 0; + scratch->d.constval.isnull = true; + ExprEvalPushStep(state, scratch); + + escontext = jsexpr->on_error->btype != JSON_BEHAVIOR_ERROR ? + &jsestate->escontext : NULL; + + /* + * To handle coercion errors softly, use the following ErrorSaveContext to + * pass to VciExecInitExprRec() when initializing the coercion expressions + * and in the EEOP_JSONEXPR_COERCION step. + */ + jsestate->escontext.type = T_ErrorSaveContext; + + /* + * Steps to coerce the result value computed by EEOP_JSONEXPR_PATH or the + * NULL returned on NULL input as described above. + */ + jsestate->jump_eval_coercion = -1; + if (jsexpr->use_json_coercion) + { + + jsestate->jump_eval_coercion = state->steps_len; + + VciExecInitJsonCoercion(state, jsexpr->returning, escontext, + jsexpr->omit_quotes, + jsexpr->op == JSON_EXISTS_OP, + resv, resnull); + } + else if (jsexpr->use_io_coercion) + { + /* + * Here we only need to initialize the FunctionCallInfo for the target + * type's input function, which is called by ExecEvalJsonExprPath() + * itself, so no additional step is necessary. + */ + Oid typinput; + Oid typioparam; + FmgrInfo *finfo; + FunctionCallInfo fcinfo; + + getTypeInputInfo(jsexpr->returning->typid, &typinput, &typioparam); + finfo = palloc0_object(FmgrInfo); + fcinfo = palloc0(SizeForFunctionCallInfo(3)); + fmgr_info(typinput, finfo); + fmgr_info_set_expr((Node *) jsexpr->returning, finfo); + InitFunctionCallInfoData(*fcinfo, finfo, 3, InvalidOid, NULL, NULL); + + /* + * We can preload the second and third arguments for the input + * function, since they're constants. + */ + fcinfo->args[1].value = ObjectIdGetDatum(typioparam); + fcinfo->args[1].isnull = false; + fcinfo->args[2].value = Int32GetDatum(jsexpr->returning->typmod); + fcinfo->args[2].isnull = false; + fcinfo->context = (Node *) escontext; + + jsestate->input_fcinfo = fcinfo; + } + + /* + * Add a special step, if needed, to check if the coercion evaluation ran + * into an error but was not thrown because the ON ERROR behavior is not + * ERROR. It will set jsestate->error if an error did occur. + */ + if (jsestate->jump_eval_coercion >= 0 && escontext != NULL) + { + scratch->opcode = EEOP_JSONEXPR_COERCION_FINISH; + scratch->d.jsonexpr.jsestate = jsestate; + ExprEvalPushStep(state, scratch); + } + + jsestate->jump_empty = jsestate->jump_error = -1; + + /* + * Step to check jsestate->error and return the ON ERROR expression if + * there is one. This handles both the errors that occur during jsonpath + * evaluation in EEOP_JSONEXPR_PATH and subsequent coercion evaluation. + * + * Speed up common cases by avoiding extra steps for a NULL-valued ON + * ERROR expression unless RETURNING a domain type, where constraints must + * be checked. ExecEvalJsonExprPath() already returns NULL on error, + * making additional steps unnecessary in typical scenarios. Note that the + * default ON ERROR behavior for JSON_VALUE() and JSON_QUERY() is to + * return NULL. + */ + if (jsexpr->on_error->btype != JSON_BEHAVIOR_ERROR && + (!(IsA(jsexpr->on_error->expr, Const) && + ((Const *) jsexpr->on_error->expr)->constisnull) || + returning_domain)) + { + ErrorSaveContext *saved_escontext; + + jsestate->jump_error = state->steps_len; + + /* JUMP to end if false, that is, skip the ON ERROR expression. */ + jumps_to_end = lappend_int(jumps_to_end, state->steps_len); + scratch->opcode = EEOP_JUMP_IF_NOT_TRUE; + scratch->resvalue = &jsestate->error.value; + scratch->resnull = &jsestate->error.isnull; + scratch->d.jump.jumpdone = -1; /* set below */ + ExprEvalPushStep(state, scratch); + + /* + * Steps to evaluate the ON ERROR expression; handle errors softly to + * rethrow them in COERCION_FINISH step that will be added later. + */ + saved_escontext = state->escontext; + state->escontext = escontext; + VciExecInitExprRec((Expr *) jsexpr->on_error->expr, parent, + state, resv, resnull, inittype); + state->escontext = saved_escontext; + + /* Step to coerce the ON ERROR expression if needed */ + if (jsexpr->on_error->coerce) + VciExecInitJsonCoercion(state, jsexpr->returning, escontext, jsexpr->omit_quotes, false, resv, + resnull); + + /* + * Add a COERCION_FINISH step to check for errors that may occur when + * coercing and rethrow them. + */ + if (jsexpr->on_error->coerce || + IsA(jsexpr->on_error->expr, CoerceViaIO) || + IsA(jsexpr->on_error->expr, CoerceToDomain)) + { + scratch->opcode = EEOP_JSONEXPR_COERCION_FINISH; + scratch->resvalue = resv; + scratch->resnull = resnull; + scratch->d.jsonexpr.jsestate = jsestate; + ExprEvalPushStep(state, scratch); + } + + /* JUMP to end to skip the ON EMPTY steps added below. */ + jumps_to_end = lappend_int(jumps_to_end, state->steps_len); + scratch->opcode = EEOP_JUMP; + scratch->d.jump.jumpdone = -1; + ExprEvalPushStep(state, scratch); + } + + /* + * Step to check jsestate->empty and return the ON EMPTY expression if + * there is one. + * + * See the comment above for details on the optimization for NULL-valued + * expressions. + */ + if (jsexpr->on_empty != NULL && + jsexpr->on_empty->btype != JSON_BEHAVIOR_ERROR && + (!(IsA(jsexpr->on_empty->expr, Const) && + ((Const *) jsexpr->on_empty->expr)->constisnull) || + returning_domain)) + { + ErrorSaveContext *saved_escontext; + + jsestate->jump_empty = state->steps_len; + + /* JUMP to end if false, that is, skip the ON EMPTY expression. */ + jumps_to_end = lappend_int(jumps_to_end, state->steps_len); + scratch->opcode = EEOP_JUMP_IF_NOT_TRUE; + scratch->resvalue = &jsestate->empty.value; + scratch->resnull = &jsestate->empty.isnull; + scratch->d.jump.jumpdone = -1; /* set below */ + ExprEvalPushStep(state, scratch); + + /* + * Steps to evaluate the ON EMPTY expression; handle errors softly to + * rethrow them in COERCION_FINISH step that will be added later. + */ + saved_escontext = state->escontext; + state->escontext = escontext; + VciExecInitExprRec((Expr *) jsexpr->on_empty->expr, parent, + state, resv, resnull, inittype); + state->escontext = saved_escontext; + + /* Step to coerce the ON EMPTY expression if needed */ + if (jsexpr->on_empty->coerce) + VciExecInitJsonCoercion(state, jsexpr->returning, escontext, jsexpr->omit_quotes, false, resv, + resnull); + + /* + * Add a COERCION_FINISH step to check for errors that may occur when + * coercing and rethrow them. + */ + if (jsexpr->on_empty->coerce || + IsA(jsexpr->on_empty->expr, CoerceViaIO) || + IsA(jsexpr->on_empty->expr, CoerceToDomain)) + { + scratch->opcode = EEOP_JSONEXPR_COERCION_FINISH; + scratch->resvalue = resv; + scratch->resnull = resnull; + scratch->d.jsonexpr.jsestate = jsestate; + ExprEvalPushStep(state, scratch); + } + } + + foreach(lc, jumps_to_end) + { + ExprEvalStep *as = &state->steps[lfirst_int(lc)]; + + as->d.jump.jumpdone = state->steps_len; + } + + jsestate->jump_end = state->steps_len; +} + +/* + * Initialize a EEOP_JSONEXPR_COERCION step to coerce the value given in resv + * to the given RETURNING type. + */ +static void +VciExecInitJsonCoercion(ExprState *state, JsonReturning *returning, + ErrorSaveContext *escontext, bool omit_quotes, + bool exists_coerce, + Datum *resv, bool *resnull) +{ + ExprEvalStep scratch = {0}; + + /* For json_populate_type() */ + scratch.opcode = EEOP_JSONEXPR_COERCION; + scratch.resvalue = resv; + scratch.resnull = resnull; + scratch.d.jsonexpr_coercion.targettype = returning->typid; + scratch.d.jsonexpr_coercion.targettypmod = returning->typmod; + scratch.d.jsonexpr_coercion.json_coercion_cache = NULL; + scratch.d.jsonexpr_coercion.escontext = escontext; + scratch.d.jsonexpr_coercion.omit_quotes = omit_quotes; + scratch.d.jsonexpr_coercion.exists_coerce = exists_coerce; + scratch.d.jsonexpr_coercion.exists_cast_to_int = exists_coerce && + getBaseType(returning->typid) == INT4OID; + scratch.d.jsonexpr_coercion.exists_check_domain = exists_coerce && + DomainHasConstraints(returning->typid); + ExprEvalPushStep(state, &scratch); +} diff --git a/contrib/vci/executor/vci_fetch_column_store.c b/contrib/vci/executor/vci_fetch_column_store.c new file mode 100644 index 0000000..a9bd7c7 --- /dev/null +++ b/contrib/vci/executor/vci_fetch_column_store.c @@ -0,0 +1,1193 @@ +/*------------------------------------------------------------------------- + * + * vci_fetch_column_store.c + * Routine to fetch from column store + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/executor/vci_fetch_column_store.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/xact.h" /* for IsolationIsSerializable */ +#include "access/xlog.h" /* for RecoveryInProgress() */ +#include "access/xlogrecovery.h" +#include "catalog/pg_type.h" +#include "executor/execExpr.h" +#include "executor/executor.h" /* for EXEC_FLAG_BACKWARD */ +#include "miscadmin.h" +#include "nodes/execnodes.h" +#include "nodes/nodes.h" +#include "nodes/plannodes.h" +#include "storage/ipc.h" /* for before_shmem_exit() */ +#include "storage/lwlock.h" +#include "tcop/pquery.h" /* for ActivePortal */ +#include "utils/cash.h" +#include "utils/date.h" +#include "utils/elog.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/timestamp.h" + +#include "vci.h" + +#include "vci_executor.h" +#include "vci_fetch.h" +#include "vci_mem.h" +#include + +#if (!defined(WIN32)) + +/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, + * if you want the limit (max/min) macros for int types. + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif + +#include + +#else +typedef signed char int8_t; +typedef signed short int16_t; +typedef signed int int32_t; +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef signed long long int64_t; +typedef unsigned long long uint64_t; + +/* Limits of integral types. */ +#ifndef INT8_MIN +#define INT8_MIN (-128) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-2147483647-1) +#endif +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255U) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535U) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif + +#endif /* ! C99 */ +/** + * Used to search for VCI Scan in the plan tree with search_vci_scan_walker(). + */ +typedef struct +{ + List *scan_list; /* Record discovered VciScan in a list */ +} vci_search_vci_scan_context_t; + +/** + * Store data struct for each query + */ +vci_query_context_t *vci_query_context; + +static void initialize_query_context(PlannedStmt *target, MemoryContext smccontext); +static bool search_vci_scan_walker(Plan *plan, void *context); +static void aggregate_attr_used(List *scan_list); +static void output_local_ros_size(vci_CSQueryContext query_context); +static void enter_standby_query(void); +static void exit_standby_query(void); +static void prepare_query_contexts(bool recoveryInProgress, bool estimatingLocalROSSize); +static bool estimate_and_check_localROS_size(void); +static void shutdown_standby_query(int code, Datum arg); +static bool create_all_queries_context_for_fetching_column_store(QueryDesc *queryDesc, int eflags); +static void create_attr_map(VciScanState *scanstate, VciScan *scan, int *num_attrs_p, AttrNumber **attrNumArray_p); +static void initialize_one_fetch_context_for_fetching_column_store(VciScanState *scanstate, vci_index_placeholder_t *index_ph); + +static bool is_running_standby_query; +static bool shutdown_standby_query_registered; + +/** + * Initialize query context required for column store fetch + * + * Attempt to rewrite plan for each query, and if successful, initialize + * resources necessary to execute custom plan. + * + * @param[in,out] queryDesc query description to be rewritten + * @param[in] eflags Execution flag + * + * @note If plan rewrite is succesful, per-query SMC is constructed, and the + * rewritten plan is stored in queryDesc->plannedstmt. + * Also, vci_query_context_t will be generated in vci_query_context. + */ +void +vci_initialize_query_context(QueryDesc *queryDesc, int eflags) +{ + PlannedStmt *orig_stmt; + PlannedStmt *target; + MemoryContext tmpcontext; + MemoryContext oldcontext; + MemoryContext smccontext; + + /* + * When a previous query was failed, vci_query_context may be a dangling + * pointer. We'll only set vci_query_context to NULL but mustn't access + * the memory content pointed by vci_query_context. + */ + vci_query_context = NULL; + + /* + * In standalone mode or bootstrap mode, disable VCI execution. + */ + if (!IsPostmasterEnvironment) + return; + + if (!VciGuc.enable) + return; + + orig_stmt = queryDesc->plannedstmt; + + /* + * Custom plan is only for SELECT command. For other commands, plan tree + * rewrite won't be performed. + */ + if (orig_stmt->commandType != CMD_SELECT) + return; + + /* + * Stop if isolation level is serializable + */ + if (IsolationIsSerializable()) + return; + + /* + * Stop if full_page_writes is off + */ + if (!fullPageWrites) + return; + + /* + * Stop if WITH HOLD is specified in DECLARE command + */ + if (ActivePortal && + (ActivePortal->cursorOptions & (CURSOR_OPT_HOLD))) + return; + + /* + * Stop if SCROLL is specified in DECLARE command or SCROLL/NO SCROLL is + * not specified but SCROLL effect is applied internally + */ + if (eflags & EXEC_FLAG_BACKWARD) + return; + + /* + * Stop if plan cost estimate is less than threshold + */ + if ((orig_stmt->planTree == NULL) || + (orig_stmt->planTree->total_cost < (Cost) VciGuc.cost_threshold)) + return; + + elog(DEBUG1, "Call vci_initialize_query_context()"); + + tmpcontext = AllocSetContextCreate(CurrentMemoryContext, + "VCI Temporary Rewrite Plan", + ALLOCSET_DEFAULT_SIZES); + + oldcontext = MemoryContextSwitchTo(tmpcontext); + + target = vci_generate_custom_plan(orig_stmt, eflags, queryDesc->snapshot); + + MemoryContextSwitchTo(oldcontext); + + if (!target) + goto done; + + smccontext = AllocSetContextCreate(TopTransactionContext, "VCI Query", + ALLOCSET_DEFAULT_SIZES); + + oldcontext = MemoryContextSwitchTo(smccontext); + + /* To rewrite plan, memory is allocated in tmpcontext, but move it to SMC */ + target = copyObjectImpl(target); + + initialize_query_context(target, smccontext); + + if (create_all_queries_context_for_fetching_column_store(queryDesc, eflags)) + { + /* Environment build is succesful, so rewrite the plan */ + queryDesc->plannedstmt = target; + } + else + { + /* Environment build failed */ + MemoryContextSwitchTo(oldcontext); + vci_finalize_query_context(); + + goto done; + } + + vci_query_context->plannedstmt = target; + + vci_query_context->max_plan_info_entries = VCI_INIT_PLAN_INFO_ENTRIES; + vci_query_context->plan_info_map = palloc0_array(vci_plan_info_t, VCI_INIT_PLAN_INFO_ENTRIES); + + vci_query_context->lock = VciShmemAddr->vci_query_context_lock; + + MemoryContextSwitchTo(oldcontext); + +done: + MemoryContextDelete(tmpcontext); +} + +static void +initialize_query_context(PlannedStmt *target, MemoryContext smccontext) +{ + vci_search_vci_scan_context_t scontext; + + vci_query_context = palloc0_object(vci_query_context_t); + + vci_query_context->mcontext = smccontext; + + scontext.scan_list = NIL; + vci_plannedstmt_tree_walker(target, search_vci_scan_walker, NULL, &scontext); + aggregate_attr_used(scontext.scan_list); + list_free(scontext.scan_list); +} + +static bool +search_vci_scan_walker(Plan *plan, void *context) +{ + vci_search_vci_scan_context_t *scontext; + + scontext = (vci_search_vci_scan_context_t *) context; + + if (plan && (IsA(plan, CustomScan) || IsA(plan, CustomPlanMarkPos))) + { + uint32 plan_type = ((CustomScan *) plan)->flags & VCI_CUSTOMPLAN_MASK; + + if (plan_type == VCI_CUSTOMPLAN_SCAN) + { + VciScan *scan = (VciScan *) plan; + + if (scan->scan_mode == VCI_SCAN_MODE_COLUMN_STORE) + { + scontext->scan_list = lappend(scontext->scan_list, plan); + return false; + } + } + } + + return vci_plan_tree_walker(plan, search_vci_scan_walker, context); +} + +/** + * If there are multiple VCI Scan in the same table, OR of the referenced attributes is taken. + * This is required to pass to vci_CSCreateQueryContext(). + */ +static void +aggregate_attr_used(List *scan_list) +{ + int i; + int uniq_vci_indexes = 0; + List *uniq_oid_list = NIL; + ListCell *outer, + *inner; + + /* + * Calculate number of unique VCI indexes referenced from query + */ + foreach(outer, scan_list) + { + bool match = false; + VciScan *scan = (VciScan *) lfirst(outer); + + foreach(inner, uniq_oid_list) + { + if (scan->indexoid == lfirst_oid(inner)) + { + match = true; + break; + } + } + + if (match) + continue; + else + { + uniq_oid_list = lappend_oid(uniq_oid_list, scan->indexoid); + uniq_vci_indexes++; + } + } + + elog(DEBUG1, "# of unique VCI indexes = %d", uniq_vci_indexes); + + /* uniq_vci_indexes can be 0 */ + + vci_query_context->num_indexes = uniq_vci_indexes; + vci_query_context->index_ph_table = palloc0_array(vci_index_placeholder_t, uniq_vci_indexes); + + i = 0; + foreach(outer, uniq_oid_list) + vci_query_context->index_ph_table[i++].indexoid = lfirst_oid(outer); + + list_free(uniq_oid_list); + uniq_oid_list = NIL; + + for (i = 0; i < uniq_vci_indexes; i++) + { + vci_index_placeholder_t *index_ph; + + index_ph = &vci_query_context->index_ph_table[i]; + + foreach(outer, scan_list) + { + VciScan *scan = (VciScan *) lfirst(outer); + + if (scan->indexoid == index_ph->indexoid) + { + index_ph->attr_used = bms_add_members(index_ph->attr_used, + scan->attr_used); + + scan->index_ph_id = i + 1; + scan->fetch_ph_id = ++index_ph->num_fetches; + } + } + + index_ph->fetch_ph_table = palloc0_array(vci_fetch_placeholder_t, index_ph->num_fetches); + } +} + +/** + * Free query context for VCI execution + * + * Release if query context is secured. + * Free if local SMC of backend process is secured. + */ +void +vci_free_query_context(void) +{ + if (vci_query_context) + { + MemoryContextDelete(vci_query_context->mcontext); + + vci_query_context = NULL; + } +} + +/** + * Determine whether custom plan that performs column store fetch is being executed + * + * @retval true Executing custom plan + * @retval false Not executing custom plan (including interruptions) + */ +bool +vci_is_processing_custom_plan(void) +{ + if (vci_query_context == NULL) + return false; + + return !vci_query_context->has_stopped; +} + +/** + * @description output the Data WOS size and Whiteout WOS size on log. + */ +static void +output_local_ros_size(vci_CSQueryContext query_context) +{ + elog(DEBUG1, + "A local ROS creation for VCI %d failed: Data WOS size = %ld, Whiteout WOS size = %ld", + query_context->main_relation_oid, + (long) query_context->num_data_wos_entries, (long) query_context->num_whiteout_wos_entries); +} + +/** + * Create data necessary for column store fetch. + * Call before executor runs. + */ +static bool +create_all_queries_context_for_fetching_column_store(QueryDesc *queryDesc, int eflags) +{ + bool result = true; + bool recoveryInProgress; + + /* + * In standby server query, ShareUpdateExclusiveLock lock cannot be + * performed during Local ROS creation. Instead, stop streaming + * replication WAL replay. + * + * Multiple queries simultaneously creating Local ROS are counted by + * num_standby_exec_queries. Restart WAL replay at the end of last query. + */ + recoveryInProgress = RecoveryInProgress(); + + if (recoveryInProgress) + enter_standby_query(); + + prepare_query_contexts(recoveryInProgress, true); + if (!estimate_and_check_localROS_size()) + goto error; + + for (int i = 0; i < vci_query_context->num_indexes; i++) + { + vci_index_placeholder_t *index_ph; + + index_ph = &vci_query_context->index_ph_table[i]; + + vci_CSDestroyQueryContext(index_ph->query_context); + index_ph->query_context = NULL; + } + prepare_query_contexts(recoveryInProgress, false); + if (!estimate_and_check_localROS_size()) + goto error; + + /* + * Create Local ROS + */ + PG_TRY(); + { + for (int i = 0; i < vci_query_context->num_indexes; i++) + { + vci_index_placeholder_t *index_ph; + + index_ph = &vci_query_context->index_ph_table[i]; + + index_ph->local_ros = vci_CSGenerateLocalRos(index_ph->query_context); + + Assert(index_ph->local_ros); + } + } + PG_CATCH(); + { + if (geterrcode() == ERRCODE_OUT_OF_MEMORY) + { + /* + * Cancel VCI execution if there is an error due to insufficient + * memory during Local ROS generation. + */ + if (VciGuc.log_query) + elog(WARNING, "out of memory during local ROS generation"); + + for (int i = 0; i < vci_query_context->num_indexes; i++) + { + vci_index_placeholder_t *index_ph; + vci_id_t vciid; + + index_ph = &vci_query_context->index_ph_table[i]; + + vciid.oid = index_ph->indexoid; + vciid.dbid = MyDatabaseId; + + vci_SetForceNextWosRosConvFlag(&vciid, true); + + if (index_ph->query_context) + { + output_local_ros_size(index_ph->query_context); + vci_CSDestroyQueryContext(index_ph->query_context); + index_ph->query_context = NULL; + } + } + + FlushErrorState(); + + result = false; + } + else + { + if (recoveryInProgress) + exit_standby_query(); + + PG_RE_THROW(); + } + } + PG_END_TRY(); + + if (recoveryInProgress) + exit_standby_query(); + + return result; + +error: + for (int i = 0; i < vci_query_context->num_indexes; i++) + { + vci_index_placeholder_t *index_ph; + vci_id_t vciid; + + index_ph = &vci_query_context->index_ph_table[i]; + + vciid.oid = index_ph->indexoid; + vciid.dbid = MyDatabaseId; + + vci_SetForceNextWosRosConvFlag(&vciid, true); + output_local_ros_size(index_ph->query_context); + + vci_CSDestroyQueryContext(index_ph->query_context); + index_ph->query_context = NULL; + } + + if (recoveryInProgress) + exit_standby_query(); + + return false; +} + +static void +enter_standby_query(void) +{ + LWLockAcquire(VciShmemAddr->standby_exec_loc, LW_EXCLUSIVE); + if (VciShmemAddr->num_standby_exec_queries == 0) + SetVciRecoveryPause(); + VciShmemAddr->num_standby_exec_queries++; + LWLockRelease(VciShmemAddr->standby_exec_loc); + + if (!shutdown_standby_query_registered) + { + before_shmem_exit(shutdown_standby_query, 0); + shutdown_standby_query_registered = true; + } + + is_running_standby_query = true; +} + +static void +exit_standby_query(void) +{ + is_running_standby_query = false; + + LWLockAcquire(VciShmemAddr->standby_exec_loc, LW_EXCLUSIVE); + VciShmemAddr->num_standby_exec_queries--; + if (VciShmemAddr->num_standby_exec_queries == 0) + SetRecoveryPause(false); + LWLockRelease(VciShmemAddr->standby_exec_loc); +} + +static void +shutdown_standby_query(int code, Datum arg) +{ + if (!is_running_standby_query) + return; + + is_running_standby_query = false; + + LWLockAcquire(VciShmemAddr->standby_exec_loc, LW_EXCLUSIVE); + VciShmemAddr->num_standby_exec_queries--; + if (VciShmemAddr->num_standby_exec_queries == 0) + SetRecoveryPause(false); + LWLockRelease(VciShmemAddr->standby_exec_loc); +} + +/** + * @description allocate query_contexts for VCIs. + * @param[in] recoveryInProgress true if recovery is in progress. + * @param[in] estimatingLocalROSSize true if estimating a local ROS size. + */ +static void +prepare_query_contexts(bool recoveryInProgress, bool estimatingLocalROSSize) +{ + for (int i = 0; i < vci_query_context->num_indexes; i++) + { + int j, + k, + num_attrs; + AttrNumber *attrNumArray; + vci_index_placeholder_t *index_ph; + vci_id_t vciid; + + index_ph = &vci_query_context->index_ph_table[i]; + + num_attrs = bms_num_members(index_ph->attr_used); + + attrNumArray = palloc_array(AttrNumber, num_attrs); + + j = k = 0; + do + { + if (bms_is_member(k, index_ph->attr_used)) + attrNumArray[j++] = k; + + k++; + } while (j < num_attrs); + + /* update memory entry */ + vciid.oid = index_ph->indexoid; + vciid.dbid = MyDatabaseId; + + vci_TouchMemoryEntry(&vciid, + get_rel_tablespace(index_ph->indexoid)); + + index_ph->query_context = vci_CSCreateQueryContext( + index_ph->indexoid, + num_attrs, /* Numbe of read columns */ + attrNumArray, /* Array of read columns */ + vci_query_context->mcontext, /* SMC */ + recoveryInProgress, + estimatingLocalROSSize); + + Assert(index_ph->query_context); + + pfree(attrNumArray); + } +} + +/** + * @description estimate the size of local ROS + * @return true if estimated local ROS size is smaller than upperbounds. + */ +static bool +estimate_and_check_localROS_size() +{ + Size total_local_ros_size = 0; + + /* + * Estimate Local ROS size + */ + for (int i = 0; i < vci_query_context->num_indexes; i++) + { + Size local_ros_size; + + local_ros_size = vci_CSEstimateLocalRosSize(vci_query_context->index_ph_table[i].query_context); + + if (local_ros_size == (Size) -1) + { + if (VciGuc.log_query) + elog(WARNING, "too many rows in Data WOS"); + + return false; + } + + total_local_ros_size += local_ros_size; + } + + if (VciGuc.max_local_ros_size * UINT64CONST(1024) < total_local_ros_size) + { + if (VciGuc.log_query) + elog(WARNING, "could not use VCI: local ROS size (%zu) exceeds vci.max_local_ros (%zu)", + total_local_ros_size, (Size) VciGuc.max_local_ros_size * UINT64CONST(1024)); + + return false; + } + + return true; +} + +/** + * Finalize query context requited for column store fetch + * + * @note Object pointed to by vci_query_context is collected. + */ +void +vci_finalize_query_context(void) +{ + Assert(vci_query_context); + + elog(DEBUG1, "Call vci_finalize_query_context()"); + + for (int i = vci_query_context->num_indexes - 1; i >= 0; i--) + { + if (vci_query_context->index_ph_table[i].local_ros) + { + vci_CSDestroyLocalRos(vci_query_context->index_ph_table[i].local_ros); + vci_query_context->index_ph_table[i].local_ros = NULL; + } + + if (vci_query_context->index_ph_table[i].query_context) + { + vci_CSDestroyQueryContext(vci_query_context->index_ph_table[i].query_context); + vci_query_context->index_ph_table[i].query_context = NULL; + } + } + vci_free_query_context(); + vci_query_context = NULL; +} + +/** + * VCI Scan assigned serial number only to attributes read from the table and creates map. + * + * @param[out] scanstate VCI Scan state to be output + * @param[in] scan Original VCI Scan + * @param[out] num_attrs_p Number of attributes to read + * @param[out] attrNumArray_p Map of original attribute number in table -> serial numbers for attributes to be read + * + * @todo The order of function arguments are unnatural + */ +static void +create_attr_map(VciScanState *scanstate, VciScan *scan, int *num_attrs_p, AttrNumber **attrNumArray_p) +{ + int top_attr, + attr_index, + num_attrs; + AttrNumber *attrNumArray; + + num_attrs = bms_num_members(scan->attr_used); + + attrNumArray = palloc_array(AttrNumber, num_attrs); + + top_attr = 1; /* AttrNumber starts from 1 */ + attr_index = 0; + + do + { + if (bms_is_member(top_attr, scan->attr_used)) + attrNumArray[attr_index++] = top_attr; + + top_attr++; + } while (attr_index < num_attrs); + + /* Record the biggest AttrNumber */ + scanstate->last_attr = top_attr - 1; + + /* + * Create a map of column number returned by column store fetch from + * AttrNumber so that searched can be performed from Var. + */ + scanstate->attr_map = palloc0_array(int, (scanstate->last_attr + 1)); + + for (int i = 0; i < num_attrs; i++) + /* Add 1 to the index number so that 0 indicates an invalid value */ + scanstate->attr_map[attrNumArray[i]] = i + 1; + + *num_attrs_p = num_attrs; + *attrNumArray_p = attrNumArray; +} + +/** + * Create data required for a specific VCI Scan to perform column store fetch. + * + * @param[in,out] scanstate Pointer to VCI Scan + * @param[in,out] econtext expression context needed for execution + * + * @note Call only once in ExecInit for VCI Scan. + */ +void +vci_create_one_fetch_context_for_fetching_column_store(VciScanState *scanstate, ExprContext *econtext) +{ + VciScan *scan = (VciScan *) scanstate->vci.css.ss.ps.plan; + vci_index_placeholder_t *index_ph; + vci_fetch_placeholder_t *fetch_ph; + int num_attrs; + AttrNumber *attrNumArray; + + elog(DEBUG1, "Call vci_create_one_fetch_context_for_fetching_column_store()"); + + create_attr_map(scanstate, scan, &num_attrs, &attrNumArray); + + Assert((1 <= scan->index_ph_id) && (scan->index_ph_id <= vci_query_context->num_indexes)); + + index_ph = &vci_query_context->index_ph_table[scan->index_ph_id - 1]; + + scanstate->fetch_context + = vci_CSCreateFetchContext(index_ph->query_context, + VCI_NUM_ROWS_READ_AT_ONCE, + num_attrs, + attrNumArray, + true, /* column store */ + false, /* Do not return TID vector */ + true /* Returns CRID */ ); + + Assert(scanstate->fetch_context); + + pfree(attrNumArray); + + initialize_one_fetch_context_for_fetching_column_store(scanstate, index_ph); + + /* Record status in shared memory area */ + Assert((1 <= scan->fetch_ph_id) && (scan->fetch_ph_id <= index_ph->num_fetches)); + + fetch_ph = &index_ph->fetch_ph_table[scan->fetch_ph_id - 1]; + + fetch_ph->fetch_context = scanstate->fetch_context; + fetch_ph->scanstate = scanstate; +} + +/** + * Parallel background worker copies data necessary for VCI Scan to perform + * column store fetch. + * + * @param[in,out] scanstate Pointer to VCI Scan + * + * @note This is for parallel background worker + */ +void +vci_clone_one_fetch_context_for_fetching_column_store(VciScanState *scanstate) +{ + VciScan *scan = (VciScan *) scanstate->vci.css.ss.ps.plan; + vci_index_placeholder_t *index_ph; + int num_attrs; + AttrNumber *attrNumArray; + + Assert((1 <= scan->index_ph_id) && (scan->index_ph_id <= vci_query_context->num_indexes)); + + index_ph = &vci_query_context->index_ph_table[scan->index_ph_id - 1]; + + /* Copy fecth context created on backend */ + scanstate->fetch_context = index_ph->fetch_ph_table[scan->fetch_ph_id - 1].fetch_context; + + create_attr_map(scanstate, scan, &num_attrs, &attrNumArray); + + pfree(attrNumArray); + + initialize_one_fetch_context_for_fetching_column_store(scanstate, index_ph); + + /* + * first_extent_id, last_extent_id, first_fetch of scanstate of VCI Scan + * to be scanned in parallel are reset when the task is received. + */ +} + +static void +initialize_one_fetch_context_for_fetching_column_store(VciScanState *scanstate, vci_index_placeholder_t *index_ph) +{ + scanstate->local_fetch_context + = vci_CSLocalizeFetchContext(scanstate->fetch_context, + CurrentMemoryContext); + + scanstate->extent_status + = vci_CSCreateCheckExtent(scanstate->local_fetch_context); + + Assert(scanstate->extent_status); + + scanstate->vector_set + = vci_CSCreateVirtualTuples(scanstate->local_fetch_context); + + Assert(scanstate->vector_set); + + /* Start scanning from the negative extent id if Local ROS exists */ + scanstate->first_extent_id = -index_ph->query_context->num_local_ros_extents; + scanstate->last_extent_id = index_ph->query_context->num_ros_extents; + scanstate->first_crid = (int64) scanstate->first_extent_id * VCI_NUM_ROWS_IN_EXTENT; + scanstate->last_crid = (int64) scanstate->last_extent_id * VCI_NUM_ROWS_IN_EXTENT; + scanstate->first_fetch = false; +} + +/** + * Destroy data required for specific VCI Scan to execute column store fetch. + * + * @param[in,out] scanstate Pointer to VCI Scan + * + * @note Call only once in ExecEnd for VCI Scan + */ +void +vci_destroy_one_fetch_context_for_fetching_column_store(VciScanState *scanstate) +{ + elog(DEBUG1, "Call vci_destroy_one_fetch_context_for_fetching_column_store()"); + + pfree(scanstate->attr_map); + scanstate->attr_map = NULL; + + vci_CSDestroyVirtualTuples(scanstate->vector_set); + scanstate->vector_set = NULL; + + vci_CSDestroyCheckExtent(scanstate->extent_status); + scanstate->extent_status = NULL; + + vci_CSDestroyFetchContext(scanstate->local_fetch_context); + scanstate->local_fetch_context = NULL; + + vci_CSDestroyFetchContext(scanstate->fetch_context); + scanstate->fetch_context = NULL; +} + +/** + * Specify column store read start position to VCI Scan + * + * @param[in, out] scanstate Pointer to VCI Scan + * @param[in] crid_statrt Read start CRID + * @param[in] size Number of rows to read at a time + * (VCI_NUM_ROWS_IN_EXTENT or less) + */ +void +vci_set_starting_position_for_fetching_column_store(VciScanState *scanstate, int64 crid_start, int size) +{ + int64 crid_end = crid_start + size; + int32 extent_id; + + /* + * Dividing by VCI_NUM_ROWS_IN_EXTENT doesn't work when crid_start is + * negative, so bit shift. + */ + extent_id = crid_start >> VCI_CRID_ROW_ID_BIT_WIDTH; + + Assert(crid_end <= (int64) (extent_id + 1) * VCI_NUM_ROWS_IN_EXTENT); + + scanstate->first_extent_id = extent_id; + scanstate->last_extent_id = extent_id + 1; + scanstate->first_crid = crid_start; + scanstate->last_crid = crid_end; + + scanstate->first_fetch = false; +} + +/** + * Read vector from column store fetches in VCI Scan. + * If there are unread lines in the vector, do nothing. + * + * @param[in, out] scanstate Pointer to VCI Scan + * + * @retval false Read all rows in column store + * @retval true One or more lines remain to be read + * + * @note Before calling this function, initialize settings + * such as vci_reset_vector_set_from_column_store() and + * vci_set_starting_position_for_fetching_column_store(). + */ +bool +vci_fill_vector_set_from_column_store(VciScanState *scanstate) +{ + if (!scanstate->first_fetch) + { + int64 crid_start; + int64 crid_end; + int64 vector_end; + vci_extent_status_t *status; + vci_virtual_tuples_t *vector_set; + uint16 *skip_list; + + scanstate->first_fetch = true; + + scanstate->pos.current_extent_id = scanstate->first_extent_id; + + crid_start = scanstate->first_crid; + crid_end = scanstate->last_crid; + + /* Check first extent */ + status = scanstate->extent_status; + + vci_CSCheckExtent(status, + scanstate->local_fetch_context, + scanstate->pos.current_extent_id, + false); + + if (!status->existence || !status->visible) + goto start; + + crid_end = Min(crid_end, crid_start + status->num_rows); + + /* Read first vector */ + vector_end = (crid_start + VCI_MAX_FETCHING_ROWS) & ~(VCI_MAX_FETCHING_ROWS - 1); + + if (crid_end < vector_end) + vector_end = crid_end; + + vector_set = scanstate->vector_set; + + scanstate->pos.fetch_starting_crid = crid_start; + scanstate->pos.num_fetched_rows = + vci_CSFetchVirtualTuples(vector_set, crid_start, vector_end - crid_start); + + if (scanstate->pos.num_fetched_rows < 1) + elog(ERROR, "vci_CSFetchVirtualTuples returns %d num_fetched_rows(crid=" INT64_FORMAT ")", + scanstate->pos.num_fetched_rows, crid_start); + + scanstate->pos.offset_in_extent = (crid_start & (VCI_NUM_ROWS_IN_EXTENT - 1)) + scanstate->pos.num_fetched_rows; + scanstate->pos.num_rows_in_extent = ((crid_end - 1) & (VCI_NUM_ROWS_IN_EXTENT - 1)) + 1; + + skip_list = vci_CSGetSkipFromVirtualTuples(vector_set); + scanstate->pos.current_row = skip_list[0]; + } + +start: + CHECK_FOR_INTERRUPTS(); + + if (scanstate->pos.current_row < scanstate->pos.num_fetched_rows) + /* Can read fetched vectors */ + return true; + + if (scanstate->pos.offset_in_extent < scanstate->pos.num_rows_in_extent) + { + /* Read the next vector in the same extent */ + vci_virtual_tuples_t *vector_set; + int64 crid_start; + uint16 *skip_list; + + vector_set = scanstate->vector_set; + + crid_start = (int64) scanstate->pos.current_extent_id * VCI_NUM_ROWS_IN_EXTENT + + scanstate->pos.offset_in_extent; + + scanstate->pos.fetch_starting_crid = crid_start; + scanstate->pos.num_fetched_rows = + vci_CSFetchVirtualTuples(vector_set, crid_start, VCI_MAX_FETCHING_ROWS); + + if (scanstate->pos.num_fetched_rows < 1) + elog(ERROR, "vci_CSFetchVirtualTuples returns %d num_fetched_rows(crid=" INT64_FORMAT ")", + scanstate->pos.num_fetched_rows, crid_start); + + Assert(vector_set->num_rows > 0); + + scanstate->pos.offset_in_extent += VCI_MAX_FETCHING_ROWS; + + skip_list = vci_CSGetSkipFromVirtualTuples(vector_set); + scanstate->pos.current_row = skip_list[0]; + + goto start; + } + + /* read next extent */ + while (scanstate->pos.current_extent_id + 1 < scanstate->last_extent_id) + { + vci_extent_status_t *status = scanstate->extent_status; + int64 extent_start; + int64 extent_end; + + scanstate->pos.current_extent_id++; + + vci_CSCheckExtent(status, + scanstate->local_fetch_context, + scanstate->pos.current_extent_id, + false); + + if (status->existence && status->visible) + { + extent_start = (int64) scanstate->pos.current_extent_id * VCI_NUM_ROWS_IN_EXTENT; + extent_end = Min(extent_start + status->num_rows, scanstate->last_crid); + + scanstate->pos.offset_in_extent = 0; + scanstate->pos.num_rows_in_extent = extent_end - extent_start; + + goto start; + } + } + + /* Finished read all extent */ + return false; +} + +/** + * Temporarily record the read position of VCI Scan column store. + * + * @param[in, out] scanstate Pointer to VCI Scan + */ +void +vci_mark_pos_vector_set_from_column_store(VciScanState *scanstate) +{ + scanstate->mark = scanstate->pos; +} + +/** + * Return read position of VCI Scan column store to the marked position, + * and read data again. + * + * @param[in, out] scanstate Pointer to VCI Scan + */ +void +vci_restr_pos_vector_set_from_column_store(VciScanState *scanstate) +{ + /* read next vector in the same extent */ + vci_virtual_tuples_t *vector_set; + int64 crid_start; + + /* return to marked position */ + scanstate->pos = scanstate->mark; + + /* Re-read extent */ + vector_set = scanstate->vector_set; + + crid_start = (int64) scanstate->pos.current_extent_id * VCI_NUM_ROWS_IN_EXTENT + + (scanstate->pos.offset_in_extent - VCI_MAX_FETCHING_ROWS); + + scanstate->pos.fetch_starting_crid = crid_start; + scanstate->pos.num_fetched_rows = + vci_CSFetchVirtualTuples(vector_set, crid_start, VCI_MAX_FETCHING_ROWS); + + Assert(vector_set->num_rows > 0); +} + +/** + * When reading 1 row of vector loaded by vci_fill_vector_set_from_column_store(), + * set the row to be read next to pointer. + * + * @param[in, out] scanstate Pointer to VCI Scan + */ +void +vci_step_next_tuple_from_column_store(VciScanState *scanstate) +{ + vci_virtual_tuples_t *vector_set; + uint16 *skip_list; + + vector_set = scanstate->vector_set; + skip_list = vci_CSGetSkipFromVirtualTuples(vector_set); + + scanstate->pos.current_row += skip_list[scanstate->pos.current_row + 1] + 1; +} + +/** + * Set lines of loaded vector to read + * + * @param[in, out] scanstate Pointer to VCI Scan + */ +void +vci_finish_vector_set_from_column_store(VciScanState *scanstate) +{ + scanstate->pos.current_row = scanstate->pos.num_fetched_rows; +} + +/** + * Execute vector process corresponding to target list of VCI Scan + * + * @param[in,out] scanstate Pointer to VCI Scan + * @param[in,out] econtext expression context required for execution + * @param[in] max_slots max length of this vector + */ +void +VciExecTargetListWithVectorProcessing(VciScanState *scanstate, ExprContext *econtext, int max_slots) +{ + for (int i = 0; i < scanstate->num_vp_targets; i++) + VciExecEvalVectorProcessing(scanstate->vp_targets[i], econtext, max_slots); +} + +/** + * Evaluation function for Var when performing column store fetch + * + * @param[in,out] exprstate expression state tree of Var (VciVarState type) + * @param[in,out] econtext expression context required for execution + * @param[out] isNull Return NULL/NOT NULL information of evaluation result of Var + * @param[out] isDone Return state when multiple lines are retruned. Always ExprSingleResult in VciParamState. + * + * @return Return evaluation result data of Var + * + * @note Called from VciExecInitExpr() + */ +void +VciExecEvalScalarVarFromColumnStore(ExprState *exprstate, ExprEvalStep *op, ExprContext *econtext) +{ + vci_virtual_tuples_column_info_t *data_vector; + int index; + int null_bit_id; + + PlanState *parent; + VciScanState *scanstate; + int attnum; + + attnum = op->d.var.attnum; + parent = op->d.var.vci_parent_planstate; + scanstate = vci_search_scan_state((VciPlanState *) parent); + + /* The actual index number is the value minus 1. 0 is invalid. */ + index = scanstate->attr_map[attnum] - 1; + + Assert(index >= 0); + Assert(index < scanstate->vector_set->num_columns); + + data_vector = &scanstate->vector_set->column_info[index]; + + null_bit_id = data_vector->null_bit_id; + + if (null_bit_id >= 0) + *op->resnull = vci_CSGetIsNullOfVirtualTupleColumnar(scanstate->vector_set, index)[scanstate->pos.current_row]; + + *op->resvalue = vci_CSGetValuesOfVirtualTupleColumnar(scanstate->vector_set, index)[scanstate->pos.current_row]; + +} diff --git a/contrib/vci/executor/vci_gather.c b/contrib/vci/executor/vci_gather.c new file mode 100644 index 0000000..ffd98b8 --- /dev/null +++ b/contrib/vci/executor/vci_gather.c @@ -0,0 +1,157 @@ +/*------------------------------------------------------------------------- + * + * vci_gather.c + * Routines to handle VCI Gather nodes + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/executor/vci_gather.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "commands/explain.h" +#include "commands/explain_format.h" +#include "executor/nodeCustom.h" + +#include "vci.h" +#include "vci_executor.h" +#include "vci_utils.h" + +/* + * Declarations of Custom Plan Methods callbacks + */ +static void vci_gather_BeginCustomPlan(CustomScanState *node, EState *estate, int eflags); +static TupleTableSlot *vci_gather_ExecCustomPlan(CustomScanState *node); +static void vci_gather_EndCustomPlan(CustomScanState *node); +static void vci_gather_ReScanCustomPlan(CustomScanState *node); +static void vci_gather_MarkPosCustomPlan(CustomScanState *cpstate); +static void vci_gather_RestrPosCustomPlan(CustomScanState *cpstate); + +static CustomScan *vci_gather_CopyCustomPlan(const CustomScan *_from); + +static Node * +vci_gather_CreateCustomScanState(CustomScan *cs) +{ + VciGather *vgather; + VciGatherState *vgs = palloc0_object(VciGatherState); + + vgather = (VciGather *) cs; + + vgs->vci.css.ss.ps.type = T_CustomScanState; + vgs->vci.css.ss.ps.plan = (Plan *) vgather; + vgs->vci.css.flags = cs->flags; + vgs->vci.css.methods = &vci_gather_exec_methods; + + return (Node *) vgs; +} + +static void +vci_gather_BeginCustomPlan(CustomScanState *node, EState *estate, int eflags) +{ + VciGather *gather; + VciGatherState *gatherstate; + + gather = (VciGather *) node->ss.ps.plan; + + /* + * create state structure + */ + gatherstate = (VciGatherState *) node; + + gatherstate->vci.css.ss.ps.state = estate; + + /* create expression context for node */ + ExecAssignExprContext(estate, &gatherstate->vci.css.ss.ps); + + outerPlanState(gatherstate) = ExecInitNode(outerPlan(gather), estate, eflags); + + ExecInitResultTupleSlotTL(&gatherstate->vci.css.ss.ps, &TTSOpsVirtual); +} + +static TupleTableSlot * +vci_gather_ExecCustomPlan(CustomScanState *cstate) +{ + VciGatherState *gatherstate = (VciGatherState *) cstate; + + return ExecProcNode(outerPlanState(gatherstate)); +} + +static void +vci_gather_EndCustomPlan(CustomScanState *node) +{ + VciGatherState *gatherstate = (VciGatherState *) node; + + /* clean out the tuple table */ + ExecClearTuple(gatherstate->vci.css.ss.ps.ps_ResultTupleSlot); + + ExecEndNode(outerPlanState(node)); +} + +static void +vci_gather_ReScanCustomPlan(CustomScanState *node) +{ + /* + * if chgParam of subnode is not null then plan will be re-scanned by + * first ExecProcNode. + */ + if (node->ss.ps.lefttree->chgParam == NULL) + ExecReScan(node->ss.ps.lefttree); +} + +static void +vci_gather_MarkPosCustomPlan(CustomScanState *node) +{ + elog(PANIC, "VCI Gather does not support MarkPosCustomPlan call convention"); +} + +/* LCOV_EXCL_START */ + +static void +vci_gather_RestrPosCustomPlan(CustomScanState *node) +{ + elog(PANIC, "VCI Gather does not support RestrPosCustomPlan call convention"); +} + +/* LCOV_EXCL_STOP */ + +static CustomScan * +vci_gather_CopyCustomPlan(const CustomScan *_from) +{ + const VciGather *from = (const VciGather *) _from; + VciGather *newnode; + + newnode = palloc0_object(VciGather); + + vci_copy_plan(&newnode->vci, &from->vci); + + ((Node *) newnode)->type = nodeTag((Node *) from); + + return &newnode->vci.cscan; +} + +CustomScanMethods vci_gather_scan_methods = { + "VCI Gather", + vci_gather_CreateCustomScanState, + vci_gather_CopyCustomPlan, +}; + +CustomExecMethods vci_gather_exec_methods = { + "VCI Gather", + vci_gather_BeginCustomPlan, + vci_gather_ExecCustomPlan, + vci_gather_EndCustomPlan, + vci_gather_ReScanCustomPlan, + vci_gather_MarkPosCustomPlan, + vci_gather_RestrPosCustomPlan, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL +}; diff --git a/contrib/vci/executor/vci_param.c b/contrib/vci/executor/vci_param.c new file mode 100644 index 0000000..51cc468 --- /dev/null +++ b/contrib/vci/executor/vci_param.c @@ -0,0 +1,60 @@ +/*------------------------------------------------------------------------- + * + * vci_param.c + * Routines to handle VCI Param Expr node + * + * Param evaluation may execute ExecSetParamPlan() only the first time to execute the + * subquery and receive and return the result, but parallel workers in parallel execution + * may not be able to execute the subquery. To avoid this, the parallel worker asks + * the main backend process to execute ExecSetParamPlan() on its behalf. + * + * Therefore, Param is converted to dedicated VciParamState. + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/executor/vci_param.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" +#include "executor/execExpr.h" +#include "executor/nodeSubplan.h" +#include "nodes/execnodes.h" +#include "nodes/primnodes.h" + +#include "vci.h" +#include "vci_executor.h" + +/** + * VciParamState evaluation function + * + * @param[in] exprstate Pointer to VciParamState (Casted to ExprState) + * @param[in] econtext execution context + * @param[out] isNull true if result of evaluation function is NULL + * @return result of evaluation function + */ +void +VciExecEvalParamExec(ExprState *exprstate, ExprEvalStep *op, ExprContext *econtext) +{ + ParamExecData *prm; + + int thisParamId = op->d.param.paramid; + + /* + * PARAM_EXEC params (internal executor parameters) are stored in the + * ecxt_param_exec_vals array, and can be accessed by array index. + */ + prm = &(econtext->ecxt_param_exec_vals[thisParamId]); + + if (prm->execPlan != NULL) + { + /* Parameter not evaluated yet, so go do it */ + ExecSetParamPlan(prm->execPlan, econtext); + /* ExecSetParamPlan should have processed this param... */ + Assert(prm->execPlan == NULL); + } + + *op->resnull = prm->isnull; + *op->resvalue = prm->value; +} diff --git a/contrib/vci/executor/vci_plan.c b/contrib/vci/executor/vci_plan.c new file mode 100644 index 0000000..dcc7880 --- /dev/null +++ b/contrib/vci/executor/vci_plan.c @@ -0,0 +1,235 @@ +/*------------------------------------------------------------------------- + * + * vci_plan.c + * Common processing for VCI plan nodes + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "executor/instrument.h" +#include "executor/nodeSubplan.h" +#include "nodes/bitmapset.h" +#include "nodes/execnodes.h" +#include "nodes/makefuncs.h" /* for makeVarFromTargetEntry() */ +#include "nodes/nodes.h" +#include "nodes/pg_list.h" +#include "nodes/plannodes.h" + +#include "vci.h" +#include "vci_executor.h" + +static VciScan *search_scan(Plan *node, AttrNumber scan_plan_no); +static VciScanState *search_scan_state(PlanState *node, Plan *target); + +/** + * Determine if given plan node is CustomPlan + * + * @param[in] plan plan node + * @return true if CustomPlan, else false + */ +bool +vci_is_custom_plan(Plan *plan) +{ + NodeTag type; + + type = nodeTag(plan); + + if ((type == T_CustomScan) || (type == T_CustomPlanMarkPos)) + return true; + + return false; +} + +/** + * Returns type of VCI plan node with VCI_CUSTOMPLAN_XXX macro + * + * @param[in] plan plan node + * @retval 0 not VCI plan node + * @retval non 0 is a VCI plan node + */ +int +vci_get_vci_plan_type(Plan *plan) +{ + if (plan == NULL) + return 0; + + if (!vci_is_custom_plan(plan)) + return 0; + + return ((CustomScan *) plan)->flags & VCI_CUSTOMPLAN_MASK; +} + +/** + * Copy only the basic part of the VCI-derived plan nodes given to src to dest. + * + * @param[out] dest Copy destination + * @param[in] src Copy source + */ +void +vci_copy_plan(VciPlan *dest, const VciPlan *src) +{ + dest->scan_plan_no = src->scan_plan_no; + + /* Do not copy scan_cached */ + dest->scan_cached = NULL; +} + +/** + * Search and return VCI Scan node that is the source of data input for the VCI plan node + * + * @param[in] node Pointer to the VCI plan that serves as search starting point + * @return Pointer to VCI Scan plan + */ +VciScan * +vci_search_scan(VciPlan *node) +{ + AttrNumber scan_plan_no; + VciScan *result; + + if (node->scan_cached) + return node->scan_cached; + + scan_plan_no = node->scan_plan_no; + if (scan_plan_no == 0) + return NULL; + + result = search_scan(&node->cscan.scan.plan, scan_plan_no); + + if (node->scan_cached == NULL) + node->scan_cached = result; + + return result; +} + +/** + * Subroutine for vci_search_scan() + * + * Recursively descend and search for VCI Scan nodes. + */ +static VciScan * +search_scan(Plan *node, AttrNumber scan_plan_no) +{ + if (node->plan_no == scan_plan_no) + return (VciScan *) node; + + if (outerPlan(node)) + { + VciScan *result = search_scan(outerPlan(node), scan_plan_no); + + if (result != NULL) + return result; + } + + if (innerPlan(node)) + { + VciScan *result = search_scan(innerPlan(node), scan_plan_no); + + if (result != NULL) + return result; + } + + /* + * Some types of plan nodes have plans other than outerPlan and innerPlan, + * but they do not contain VCI Scan nodes. + */ + + return NULL; +} + +/** + * Search and return VCI Scan State node that is the source of data input for the VCI plan state node + * + * @param[in] node Pointer to the VCI plan state node that serves as search starting point + * @return Pointer to VCI Scan plan state node + */ +VciScanState * +vci_search_scan_state(VciPlanState *node) +{ + VciScan *scan; + VciScanState *result; + + if (node->scanstate_cached) + return node->scanstate_cached; + + scan = vci_search_scan((VciPlan *) node->css.ss.ps.plan); + if (scan == NULL) + return NULL; + + result = search_scan_state(&node->css.ss.ps, &scan->vci.cscan.scan.plan); + + if (node->scanstate_cached == NULL) + node->scanstate_cached = result; + + return result; +} + +/** + * Subroutine for vci_search_scan_state() + * + * Recursively descend and search for VCI Scan state nodes. + */ +static VciScanState * +search_scan_state(PlanState *node, Plan *target) +{ + if (node->plan == target) + { + Assert(node->type == T_CustomScanState); + return (VciScanState *) node; + } + + if (outerPlanState(node)) + { + VciScanState *result = search_scan_state(outerPlanState(node), target); + + if (result != NULL) + return result; + } + + if (innerPlanState(node)) + { + VciScanState *result = search_scan_state(innerPlanState(node), target); + + if (result != NULL) + return result; + } + + /* + * Depending on the type of Plan State, some may have Plan States other + * than outerPlanState and innerPlanState, but they do not have VCI Scan + * State. + */ + + return NULL; +} + +/** + * Create a target list that pass through the lower nodes required for + * Materialize node. + * + * @param[in] targetlist target list + * @return created pass through target list + */ +List * +vci_generate_pass_through_target_list(List *targetlist) +{ + List *new_targetlist = NIL; + ListCell *lc; + + foreach(lc, targetlist) + { + TargetEntry *src_tle = (TargetEntry *) lfirst(lc); + TargetEntry *new_tle; + + new_tle = makeNode(TargetEntry); + + *new_tle = *src_tle; + new_tle->expr = (Expr *) makeVarFromTargetEntry(OUTER_VAR, src_tle); + + new_targetlist = lappend(new_targetlist, new_tle); + } + + return new_targetlist; +} diff --git a/contrib/vci/executor/vci_plan_func.c b/contrib/vci/executor/vci_plan_func.c new file mode 100644 index 0000000..c817320 --- /dev/null +++ b/contrib/vci/executor/vci_plan_func.c @@ -0,0 +1,942 @@ +/*------------------------------------------------------------------------- + * + * vci_plan_func.c + * General-purpose manipulations of plan trees + * + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/executor/vci_plan_func.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "executor/executor.h" +#include "nodes/bitmapset.h" +#include "nodes/nodeFuncs.h" +#include "nodes/pg_list.h" +#include "nodes/plannodes.h" + +#include "vci.h" +#include "vci_executor.h" + +static bool expression_walker_core(Plan *plan, bool (*walker) (Node *, void *), bool (*walker_initplan) (Node *, void *), void (*attr_cb) (AttrNumber *, void *), void *context); +static bool subplan_mutator(PlannedStmt *plannedstmt, Plan **plan_p, int plan_id, vci_mutator_t mutator, vci_topmost_plan_cb_t topmostplan, void *context, int eflags, bool *changed); +static bool plan_tree_mutator(Plan **edge, Plan *plan, vci_mutator_t mutator, void *context, int eflags, bool *changed); +static bool plan_list_tree_mutator(List **plan_list, Plan *plan, vci_mutator_t mutator, void *context, int eflags, bool *changed); + +/*---------------------------------------------------------------------------*/ +/* Plan walker */ +/*---------------------------------------------------------------------------*/ + +/** + * Helper function that traverse plan tree without updating it + * + * @param[in] plannedstmt Pointer to PlannedStmt type struct that holds the plan tree to be traversed + * @param[in] walker Callback function to be used in traverse. Returns true to stop cycle. + * @param[in] topmostplan Callback function to call before analyzing Topmost plan node + * @param[in,out] context Pointer to arbitrary data to pass to callback function +: + * @return true when callback function stop cycle, false if cycle is complete + */ +bool +vci_plannedstmt_tree_walker(PlannedStmt *plannedstmt, bool (*walker) (Plan *, void *), vci_topmost_plan_cb_t topmostplan, void *context) +{ + int i; + ListCell *l; + + if (plannedstmt == NULL) + return false; + + i = 1; + foreach(l, plannedstmt->subplans) + { + Plan *subplan = (Plan *) lfirst(l); + + if (subplan == NULL) + continue; + + if (topmostplan) + topmostplan(subplan, i /* plan_id */ , context); + + if (walker(subplan, context)) + return true; + + i++; + } + + if (plannedstmt->planTree) + { + if (topmostplan) + topmostplan(plannedstmt->planTree, 0 /* plan_id */ , context); + + if (walker(plannedstmt->planTree, context)) + return true; + } + + return false; +} + +/** + * Helper function that traverse plan node without updating it + * + * @param[in] plan Pointer to Plan type struct that holds the plan node to be traversed + * @param[in] walker Callback function to be used in traverse. Returns true to stop cycle. + * @param[in,out] context Pointer to arbitrary data to pass to callback function + * + * @return true when callback function stop cycle, false if cycle is complete + */ +bool +vci_plan_tree_walker(Plan *plan, bool (*walker) (Plan *, void *), void *context) +{ + ListCell *lc; + + switch (nodeTag(plan)) + { + case T_ForeignScan: + case T_ModifyTable: + case T_LockRows: + elog(DEBUG1, "unsupported node type: %s(%d)", + VciGetNodeName(nodeTag(plan)), (int) nodeTag(plan)); + return true; + + case T_Append: + foreach(lc, ((Append *) plan)->appendplans) + { + if (walker((Plan *) lfirst(lc), context)) + return true; + } + break; + + case T_MergeAppend: + foreach(lc, ((MergeAppend *) plan)->mergeplans) + { + if (walker((Plan *) lfirst(lc), context)) + return true; + } + break; + + case T_BitmapAnd: + foreach(lc, ((BitmapAnd *) plan)->bitmapplans) + { + if (walker((Plan *) lfirst(lc), context)) + return true; + } + break; + + case T_BitmapOr: + foreach(lc, ((BitmapOr *) plan)->bitmapplans) + { + if (walker((Plan *) lfirst(lc), context)) + return true; + } + break; + + case T_SubqueryScan: + if (((SubqueryScan *) plan)->subplan) + if (walker(((SubqueryScan *) plan)->subplan, context)) + return true; + break; + + default: + break; + } + + if (outerPlan(plan)) + if (walker(outerPlan(plan), context)) + return true; + + if (innerPlan(plan)) + if (walker(innerPlan(plan), context)) + return true; + + return false; +} + +/** + * Helper function that traverse expression tree in plan node without updating it + * + * @param[in] plan Pointer to Plan type struct that holds the plan node to be traversed + * @param[in] walker Callback function to be used in traverse. Returns true to stop cycle. + * @param[in,out] context Pointer to arbitrary data to pass to callback function + * + * @return true when callback function stop cycle, false if cycle is complete + */ +bool +vci_expression_walker(Plan *plan, bool (*walker) (Node *, void *), void *context) +{ + return expression_walker_core(plan, walker, NULL, NULL, context); +} + +/** + * Helper function that traverse expression tree in plan node without updating it + * If there is attribut information (AttrNumber) other than Var node included in plan node, + * attr_cb is executed. + * + * @param[in] plan Pointer to Plan type struct that holds the plan node to be traversed + * @param[in] walker Callback function to be used in traverse. Returns true to stop cycle. + * @param[in] attr_cb Callback function to be called when attribute (column) other than Var exists + * @param[in,out] context Pointer to arbitrary data to pass to callback function + * + * @return true when callback function stop cycle, false if cycle is complete + */ +bool +vci_expression_and_colid_walker(Plan *plan, bool (*walker) (Node *, void *), void (*attr_cb) (AttrNumber *, void *), void *context) +{ + return expression_walker_core(plan, walker, walker, attr_cb, context); +} + +/** + * Helper function that traverse expression tree in plan node without updating it + * Run walker_initplan if there is an initPlan associated with the plan node. + * + * @param[in] plan Pointer to Plan type struct that holds the plan node to be traversed + * @param[in] walker Callback function to be used in traverse. Returns true to stop cycle. + * @param[in] walker_initplan Callbac k function to be used in initPlan traverse + * @param[in,out] context Pointer to arbitrary data to pass to callback function + * + * @return true when callback function stop cycle, false if cycle is complete + */ +bool +vci_expression_and_initplan_walker(Plan *plan, bool (*walker) (Node *, void *), bool (*walker_initplan) (Node *, void *), void *context) +{ + return expression_walker_core(plan, walker, walker_initplan, NULL, context); +} + +static bool +expression_walker_core(Plan *plan, bool (*walker) (Node *, void *), bool (*walker_initplan) (Node *, void *), void (*attr_cb) (AttrNumber *, void *), void *context) +{ + if (walker_initplan) + { + if (expression_tree_walker((Node *) plan->initPlan, walker_initplan, context)) + return true; + } + + switch (nodeTag(plan)) + { + case T_Result: + { + Result *result = (Result *) plan; + + if (expression_tree_walker((Node *) result->resconstantqual, walker, context)) + return true; + } + break; + + case T_MergeAppend: + if (attr_cb) + { + MergeAppend *merge_append = (MergeAppend *) plan; + + for (int i = 0; i < merge_append->numCols; i++) + attr_cb(&merge_append->sortColIdx[i], context); + + } + break; + + case T_RecursiveUnion: + if (attr_cb) + { + RecursiveUnion *recursive_union = (RecursiveUnion *) plan; + + for (int i = 0; i < recursive_union->numCols; i++) + attr_cb(&recursive_union->dupColIdx[i], context); + } + break; + + case T_IndexScan: + { + IndexScan *index_scan = (IndexScan *) plan; + + if (expression_tree_walker((Node *) index_scan->indexqual, walker, context)) + return true; + + if (expression_tree_walker((Node *) index_scan->indexqualorig, walker, context)) + return true; + + if (expression_tree_walker((Node *) index_scan->indexorderby, walker, context)) + return true; + + if (expression_tree_walker((Node *) index_scan->indexorderbyorig, walker, context)) + return true; + } + break; + + case T_IndexOnlyScan: + { + IndexOnlyScan *index_only_scan = (IndexOnlyScan *) plan; + + if (expression_tree_walker((Node *) index_only_scan->indexqual, walker, context)) + return true; + + if (expression_tree_walker((Node *) index_only_scan->indexorderby, walker, context)) + return true; + + if (expression_tree_walker((Node *) index_only_scan->indextlist, walker, context)) + return true; + } + break; + + case T_BitmapIndexScan: + { + BitmapIndexScan *bitmap_index_scan = (BitmapIndexScan *) plan; + + if (expression_tree_walker((Node *) bitmap_index_scan->indexqual, walker, context)) + return true; + + if (expression_tree_walker((Node *) bitmap_index_scan->indexqualorig, walker, context)) + return true; + } + break; + + case T_BitmapHeapScan: + { + BitmapHeapScan *bitmap_heap_scan = (BitmapHeapScan *) plan; + + if (expression_tree_walker((Node *) bitmap_heap_scan->bitmapqualorig, walker, context)) + return true; + } + break; + + case T_TidScan: + { + TidScan *tid_scan = (TidScan *) plan; + + if (expression_tree_walker((Node *) tid_scan->tidquals, walker, context)) + return true; + } + break; + + case T_TidRangeScan: + { + TidRangeScan *tid_range_scan = (TidRangeScan *) plan; + + if (expression_tree_walker((Node *) tid_range_scan->tidrangequals, walker, context)) + return true; + } + break; + + case T_FunctionScan: + { + FunctionScan *func_scan = (FunctionScan *) plan; + + if (expression_tree_walker((Node *) func_scan->functions, walker, context)) + return true; + } + break; + + case T_ValuesScan: + { + ValuesScan *values_scan = (ValuesScan *) plan; + + if (expression_tree_walker((Node *) values_scan->values_lists, walker, context)) + return true; + } + break; + + case T_CteScan: + break; + + case T_WorkTableScan: + break; + + case T_NestLoop: + { + NestLoop *nest_loop = (NestLoop *) plan; + ListCell *lc; + + if (expression_tree_walker((Node *) nest_loop->join.joinqual, walker, context)) + return true; + + foreach(lc, nest_loop->nestParams) + { + NestLoopParam *nlp = (NestLoopParam *) lfirst(lc); + + if (walker((Node *) nlp->paramval, context)) + return true; + } + } + break; + + case T_Memoize: + { + Memoize *memoize = (Memoize *) plan; + + if (expression_tree_walker((Node *) memoize->param_exprs, walker, context)) + return true; + } + break; + + case T_MergeJoin: + { + MergeJoin *merge_join = (MergeJoin *) plan; + + if (expression_tree_walker((Node *) merge_join->join.joinqual, walker, context)) + return true; + + if (expression_tree_walker((Node *) merge_join->mergeclauses, walker, context)) + return true; + } + break; + + case T_HashJoin: + { + HashJoin *hash_join = (HashJoin *) plan; + + if (expression_tree_walker((Node *) hash_join->join.joinqual, walker, context)) + return true; + + if (expression_tree_walker((Node *) hash_join->hashclauses, walker, context)) + return true; + + if (expression_tree_walker((Node *) hash_join->hashkeys, walker, context)) + return true; + } + break; + + case T_Sort: + if (attr_cb) + { + Sort *sort = (Sort *) plan; + + for (int i = 0; i < sort->numCols; i++) + attr_cb(&sort->sortColIdx[i], context); + } + break; + + case T_Group: + if (attr_cb) + { + Group *group = (Group *) plan; + + for (int i = 0; i < group->numCols; i++) + attr_cb(&group->grpColIdx[i], context); + } + break; + + case T_Agg: + if (attr_cb) + { + Agg *agg = (Agg *) plan; + + for (int i = 0; i < agg->numCols; i++) + attr_cb(&agg->grpColIdx[i], context); + } + break; + + case T_WindowAgg: + if (attr_cb) + { + WindowAgg *window_agg = (WindowAgg *) plan; + + for (int i = 0; i < window_agg->partNumCols; i++) + attr_cb(&window_agg->partColIdx[i], context); + + for (int i = 0; i < window_agg->ordNumCols; i++) + attr_cb(&window_agg->ordColIdx[i], context); + } + break; + + case T_Unique: + if (attr_cb) + { + Unique *unique = (Unique *) plan; + + for (int i = 0; i < unique->numCols; i++) + attr_cb(&unique->uniqColIdx[i], context); + } + break; + + case T_Hash: + break; + + case T_SetOp: + if (attr_cb) + { + SetOp *setop = (SetOp *) plan; + + for (int i = 0; i < setop->numCols; i++) + attr_cb(&setop->cmpColIdx[i], context); + } + break; + + case T_Limit: + { + Limit *limit = (Limit *) plan; + + if (expression_tree_walker((Node *) limit->limitOffset, walker, context)) + return true; + + if (expression_tree_walker((Node *) limit->limitCount, walker, context)) + return true; + } + break; + + case T_CustomScan: + case T_CustomPlanMarkPos: + switch (vci_get_vci_plan_type(plan)) + { + case VCI_CUSTOMPLAN_SCAN: + case VCI_CUSTOMPLAN_SORT: + case VCI_CUSTOMPLAN_AGG: + case VCI_CUSTOMPLAN_GATHER: + break; + + default: + break; + } + break; + + case T_ForeignScan: + case T_ModifyTable: + case T_LockRows: + elog(DEBUG1, "unsupported node type: %s(%d)", + VciGetNodeName(nodeTag(plan)), (int) nodeTag(plan)); + return true; + + default: + break; + } + + if (expression_tree_walker((Node *) plan->qual, walker, context)) + return true; + + if (expression_tree_walker((Node *) plan->targetlist, walker, context)) + return true; + + /* Success */ + return false; +} + +/*---------------------------------------------------------------------------*/ +/* Plan mutator */ +/*---------------------------------------------------------------------------*/ + +/** + * Rewrite each plan node in PlannedStmt according to conditions from mutator + * + * @param[in,out] plannedstmt Pointer to PlannedStmt type struct containing plan tree to be rewritten + * @param[in] mutator Callback function to be used in rewrite + * @param[in] topmostplan Callback function to be called before parsing Topmost plan + * @param[in,out] context Pointer to arbitrary data to pass to callback function + * @param[in] eflags Specify same eflags value as the one passed when plan tree starts ExecutorStart() + * @param[out] changed Write true to *changed if any rewrite is done. Do nothing if not. + * + * @return true when callback function stop cycle, false if cycle is complete + * + * @note rewrite is executed in in-place + */ +bool +vci_plannedstmt_tree_mutator(PlannedStmt *plannedstmt, vci_mutator_t mutator, vci_topmost_plan_cb_t topmostplan, void *context, int eflags, bool *changed) +{ + int i; + ListCell *l; + + if (plannedstmt == NULL) + return false; + + i = 1; + foreach(l, plannedstmt->subplans) + { + Plan **plan_p = (Plan **) &lfirst(l); + + if (*plan_p == NULL) + continue; + + if (subplan_mutator(plannedstmt, plan_p, i, mutator, topmostplan, context, eflags, changed)) + return true; + + i++; + } + + if (plannedstmt->planTree) + { + Plan *oldplan; + Plan *newplan; + + oldplan = newplan = plannedstmt->planTree; + + if (topmostplan) + topmostplan(oldplan, 0 /* plan_id */ , context); + + if (mutator(&newplan, NULL, context, eflags, changed)) + return true; + + if (newplan != oldplan) + plannedstmt->planTree = newplan; + + if (topmostplan) + topmostplan(newplan, 0 /* plan_id */ , context); + } + + return false; +} + +/** + * Rewrite plan node in PlannedStmt according to conditions from mutator. But, specify the rewrite order of subplan. + * + * @param[in,out] plannedstmt Pointer to PlannedStmt type struct containing plan tree to be rewritten + * @param[in] mutator Callback function to be used in rewrite + * @param[in] topmostplan Callback function to be called before parsing Topmost plan + * @param[in,out] context Pointer to arbitrary data to pass to callback function + * @param[in] eflags Specify same eflags value as the one passed when plan tree starts ExecutorStart() + * @param[out] changed Write true to *changed if any rewrite is done. Do nothing if not. + * @param[in] subplan_order Array of ID in order of subplan to be parsed(including main plan) + * + * @return true when callback function stops cycle, false if cycle is complete + * + * @note rewrite is executed in in-place + */ +bool +vci_plannedstmt_tree_mutator_order(PlannedStmt *plannedstmt, vci_mutator_t mutator, vci_topmost_plan_cb_t topmostplan, void *context, int eflags, bool *changed, int *subplan_order) +{ + int i; + int max_subplans; + bool mainplan_changed = false, + subplans_changed = false; + Plan **subplan_array; + List *subplans = NIL; + ListCell *l; + + if (plannedstmt == NULL) + return false; + + max_subplans = list_length(plannedstmt->subplans); + + subplan_array = palloc0_array(Plan *, max_subplans); + + i = 0; + foreach(l, plannedstmt->subplans) + subplan_array[i++] = (Plan *) lfirst(l); + + for (i = 0; i < max_subplans + 1; i++) + { + int plan_id = subplan_order[i]; + + if (plan_id == 0) + { + Plan *oldplan; + Plan *newplan; + + oldplan = newplan = plannedstmt->planTree; + + if (topmostplan) + topmostplan(oldplan, 0 /* plan_id */ , context); + + if (mutator(&newplan, NULL, context, eflags, &mainplan_changed)) + return true; + + if (newplan != oldplan) + plannedstmt->planTree = newplan; + + if (topmostplan) + topmostplan(newplan, 0 /* plan_id */ , context); + } + else + { + Plan **plan_p = &subplan_array[plan_id - 1]; + + if (*plan_p == NULL) + continue; + + if (subplan_mutator(plannedstmt, plan_p, plan_id, mutator, topmostplan, context, eflags, &subplans_changed)) + return true; + } + } + + *changed = mainplan_changed || subplans_changed; + + if (subplans_changed) + { + for (i = 0; i < max_subplans; i++) + subplans = lappend(subplans, subplan_array[i]); + + plannedstmt->subplans = subplans; + } + + pfree(subplan_array); + + return false; +} + +static bool +subplan_mutator(PlannedStmt *plannedstmt, Plan **plan_p, int plan_id, vci_mutator_t mutator, vci_topmost_plan_cb_t topmostplan, void *context, int eflags, bool *changed) +{ + int sp_eflags; + Plan *oldplan; + Plan *newplan; + + /* + * A subplan will never need to do BACKWARD scan nor MARK/RESTORE. If it + * is a parameterless subplan (not initplan), we suggest that it be + * prepared to handle REWIND efficiently; otherwise there is no need. + */ + sp_eflags = eflags + & (EXEC_FLAG_EXPLAIN_ONLY | EXEC_FLAG_WITH_NO_DATA); + if (bms_is_member(plan_id, plannedstmt->rewindPlanIDs)) + sp_eflags |= EXEC_FLAG_REWIND; + + oldplan = newplan = *plan_p; + + if (topmostplan) + topmostplan(oldplan, plan_id, context); + + if (mutator(&newplan, NULL, context, sp_eflags, changed)) + return true; + + if (newplan != oldplan) + { + *plan_p = (void *) newplan; + *changed = true; + } + + if (topmostplan) + topmostplan(newplan, plan_id, context); + + return false; +} + +/** + * Rewrite nodes under plan + * (Do not rewrite plan itself) + * + * @param[in,out] plan_p Pointer to a pointer to Plan typepe struct that holds the plan node to be rewritten + * @param[in] parent Parent plan node of plan node to rewrite. NULL is there is no parent. + * @param[in] mutator Callback function to be used in rewrite + * @param[in,out] context Pointer to arbitrary data to pass to callback function + * @param[in] eflags eflags value same as the one passed by plan tree in ExecutorStart() + * @param[out] changed Write true to *changed if any rewrite is done. Do nothing if not. + * + * @return true when callback function stops cycle, false if cycle is complete + */ +bool +vci_plan_tree_mutator(Plan **plan_p, Plan *parent, vci_mutator_t mutator, void *context, int eflags, bool *changed) +{ + int eflags_outer, + eflags_inner; + Plan *plan; + + plan = *plan_p; + + if (plan == NULL) + return false; + + /* + * Determine unsupported plan nodes + */ + switch (nodeTag(plan)) + { + case T_ForeignScan: + case T_ModifyTable: + case T_LockRows: + elog(DEBUG1, "unsupported node type: %s(%d)", + VciGetNodeName(nodeTag(plan)), (int) nodeTag(plan)); + return true; + case T_Agg: + { + if ((parent != NULL) && (nodeTag(parent) == T_Gather || nodeTag(parent) == T_GatherMerge)) + return true; /* If underlying plan is Aggregate then it + * skip using VCI as OSS parallel + * aggregation is performing better */ + } + break; + + case T_Gather: + case T_GatherMerge: + + /* + * For parallel aggregates, there will be two aggregate nodes: + * partial and final. The Gather node could be in between these + * two nodes with a Sort in between. So check if the either the + * parent or the child of an Aggregate is a Gather node. for eg: + * Finalize Aggregate->Gather->Sort->Partial Aggregate + */ + if ((parent != NULL) && (nodeTag(parent) == T_Agg)) + return true; + default: + break; + } + + eflags_outer = eflags_inner = eflags; + + switch (nodeTag(plan)) + { + case T_Material: + case T_Sort: + eflags_outer = eflags_inner = (eflags & ~(EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)); + break; + + case T_CteScan: + eflags_outer = eflags_inner = (eflags | EXEC_FLAG_REWIND); + break; + + case T_MergeJoin: + eflags_inner = eflags | EXEC_FLAG_MARK; + break; + + case T_NestLoop: + if (((NestLoop *) plan)->nestParams == NIL) + eflags_inner = (eflags | EXEC_FLAG_REWIND); + else + eflags_inner = (eflags & ~EXEC_FLAG_REWIND); + break; + + case T_SetOp: + if (((SetOp *) plan)->strategy == SETOP_HASHED) + eflags_outer &= ~EXEC_FLAG_REWIND; + break; + + default: + break; + } + + if (plan_tree_mutator(&plan->lefttree, plan, mutator, context, eflags_outer, changed)) + return true; + + if (plan_tree_mutator(&plan->righttree, plan, mutator, context, eflags_inner, changed)) + return true; + + /* + * Process nodes other than lefttree and rightree connected to this plan + * node + */ + switch (nodeTag(plan)) + { + case T_Append: + { + Append *node = (Append *) plan; + + if (plan_list_tree_mutator(&node->appendplans, plan, mutator, context, eflags_outer, changed)) + return true; + } + break; + + case T_MergeAppend: + { + MergeAppend *node = (MergeAppend *) plan; + + if (plan_list_tree_mutator(&node->mergeplans, plan, mutator, context, eflags_outer, changed)) + return true; + } + break; + + case T_BitmapAnd: + { + BitmapAnd *node = (BitmapAnd *) plan; + + if (plan_list_tree_mutator(&node->bitmapplans, plan, mutator, context, eflags_outer, changed)) + return true; + } + break; + + case T_BitmapOr: + { + BitmapOr *node = (BitmapOr *) plan; + + if (plan_list_tree_mutator(&node->bitmapplans, plan, mutator, context, eflags_outer, changed)) + return true; + } + break; + + case T_SubqueryScan: + { + SubqueryScan *node = (SubqueryScan *) plan; + + if (plan_tree_mutator(&node->subplan, plan, mutator, context, eflags_outer, changed)) + return true; + } + break; + + default: + break; + } + + return false; +} + +/** + * Rewrite edge connected to plan node of interest (*plan_p) + * + * @param[in,out] plan_p Pointer to a pointer to Plan typepe struct that holds the plan node to be rewritten + * @param[in] parent Parent plan node of plan node to rewrite. NULL is there is no parent. + * @param[in] mutator Callback function to be used in rewrite + * @param[in,out] context Pointer to arbitrary data to pass to callback function + * @param[in] eflags eflags value same as the one passed by plan tree in ExecutorStart() + * + * @param[out] changed Write true to *changed if any rewrite is done. Do nothing if not. + */ +static bool +plan_tree_mutator(Plan **plan_p, Plan *parent, vci_mutator_t mutator, void *context, int eflags, bool *changed) +{ + if (*plan_p == NULL) + return false; + + if (mutator(plan_p, parent, context, eflags, changed)) + return true; + + return false; +} + +/** + * Rewrite plan node list + * + * @param[in,out] plan_list Pointer to List type struct that holds list of plan node to be rewritten + * @param[in] parent Parent plan node of plan node list to be rewritten. NULL if no parent. + * @param[in] mutator Callback function to be used in rewrite + * @param[in,out] context Pointer to arbitrary data to pass to callback function + * @param[in] eflags eflags value same as the one passed by plan tree in ExecutorStart() + * @param[out] changed Write true to *changed if any rewrite is done. Do nothing if not. + */ +static bool +plan_list_tree_mutator(List **plan_list, Plan *parent, vci_mutator_t mutator, void *context, int eflags, bool *changed) +{ + List *newlist = NIL; + List *list = *plan_list; + ListCell *lc; + bool any_changed = false; + + if (list == NIL) + return false; + + if (list_length(list) == 0) + return false; + + foreach(lc, list) + { + Plan *child = (Plan *) lfirst(lc); + + /* + * In case of List of plans, we need to verify any of the list item + * has Gather node in top-level plan.i.e., + * Appenedplans->Gather->Parallel Seq scan. If yes, plan tree walker + * cannot replace the gather node properly. So, skip re-writing VCI + * plan in such scenarios. + */ + if (newlist == NIL) /* Using this just to make this code check + * work only for the first time which is what + * needed */ + { + if (nodeTag(child) == T_Gather || nodeTag(child) == T_GatherMerge) + return true; + } + + if (plan_tree_mutator(&child, parent, mutator, context, eflags, &any_changed)) + return true; + + newlist = lappend(newlist, child); + } + + if (any_changed) + { + *plan_list = newlist; + *changed = true; + } + else + { + list_free(newlist); + } + + return false; +} diff --git a/contrib/vci/executor/vci_planner.c b/contrib/vci/executor/vci_planner.c new file mode 100644 index 0000000..11050ff --- /dev/null +++ b/contrib/vci/executor/vci_planner.c @@ -0,0 +1,1911 @@ +/*------------------------------------------------------------------------- + * + * vci_planner.c + * Plan rewrite routine(sequential only) + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/executor/vci_planner.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/sysattr.h" +#include "access/transam.h" +#include "catalog/pg_aggregate.h" +#include "catalog/pg_am.h" +#include "executor/executor.h" +#include "executor/nodeCustom.h" +#include "executor/nodeIndexscan.h" +#include "miscadmin.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/optimizer.h" +#include "optimizer/cost.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/plancat.h" +#include "optimizer/planmain.h" +#include "optimizer/planner.h" +#include "optimizer/restrictinfo.h" +#include "parser/parsetree.h" +#include "utils/fmgroids.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/relcache.h" +#include "utils/snapmgr.h" +#include "utils/snapshot.h" +#include "utils/syscache.h" + +#include "vci.h" +#include "vci_columns_data.h" + +#include "vci_mem.h" +#include "vci_executor.h" +#include "vci_utils.h" +#include "vci_planner.h" +#include "vci_supported_oid.h" + +/* + * rt_fetch + * + * NB: this will crash and burn if handed an out-of-range RT index + */ +#define rt_fetch(rangetable_index, rangetable) \ + ((RangeTblEntry *) list_nth(rangetable, (rangetable_index)-1)) + +/* + * getrelid + * + * Given the range index of a relation, return the corresponding + * relation OID. Note that InvalidOid will be returned if the + * RTE is for a non-relation-type RTE. + */ +#define getrelid(rangeindex,rangetable) \ + (rt_fetch(rangeindex, rangetable)->relid) + +/** + * Used to pass auxiliary information about the table to vci_can_rewrite_custom_scan(). + */ +typedef struct +{ + /** reloid of table to be selected for rewrite */ + Oid reloid; + + /** oid of selected VCI index. InvalidOid if not rewriteable. */ + Oid indexOid; + + /** Copy reltuples of selected table */ + double estimate_tuples; + + /** Bitmap of referenced column (attribute). NULL if not rewriteable. */ + Bitmapset *attrs_used; +} vci_table_info_t; + +/** + * Used to search plan tree with vci_gather_used_attrs() and vci_gather_one_used_attr(), + * and record attributes references in tables specified by scanrelid. + */ +typedef struct +{ + Index scanrelid; + + Bitmapset *attrs_used; +} vci_gather_used_attrs_t; + +/** + * Search plan tree with vci_renumber_attrs() and vci_renumber_on_attr(), and rewrite attribute number. + * Used to replace varattno in Var. + */ +typedef struct +{ + Index scanrelid; + + /** New attribute number map. newattno = attr_map[oldattno] */ + AttrNumber *attr_map; +} vci_renumber_attrs_t; + +typedef struct +{ + Plan *father_plan; + Plan *gather_plan; +} father_gather_plans; + +static bool vci_optimize_phase1(PlannedStmt *plannedstmt, vci_rewrite_plan_context_t *rp_context, int eflags); +static bool vci_rewrite_plan_tree_mutator(Plan **plan_p, Plan *parent, void *context, int eflags, bool *changed); +static bool vci_rewrite_plan_node(Plan **plan_p, Plan *parent, void *context, int eflags, bool *changed); +static bool vci_rewrite_scan_node_via_column_store(Plan **plan_p, Plan *parent, void *context, bool *changed); +static bool vci_insert_material_node_mutator(Plan **plan_p, Plan *parent, void *context, int eflags, bool *changed); +static VciSort *vci_create_custom_sort(Sort *sortnode, AttrNumber scan_plan_no); +static VciAgg *vci_create_custom_agg(Agg *aggnode, AttrNumber scan_plan_no, bool suppress_vp); +static List *vci_reconstruct_qualification(Scan *scannode); +static bool vci_can_rewrite_custom_scan(Scan *scannode, List *targetlist, List *qual, Plan *parent, vci_table_info_t *result); +static Bitmapset *vci_gather_used_attrs_in_plan(Plan *plan, Index scanrelid); +static bool vci_gather_used_attrs(Node *node, void *context); +static void vci_gather_one_used_attr(AttrNumber *attr_p, void *context); +static void vci_minimize_tlist_of_scan(Scan *scannode, Plan *parent, Index parent_refer_relid, Bitmapset *attrs_used_from_parent); +static VciScan *vci_create_custom_scan_via_column_store(Scan *scannode, const vci_table_info_t *table_info, List *tlist, List *qual, bool suppress_vp); + +static bool vci_contain_inapplicable_expr_walker(Node *node, void *context); +static bool vci_contain_nestloop_param_expr_walker(Node *node, void *context); +static bool vci_renumber_attrs(Node *node, void *context); +static void vci_renumber_one_attr(AttrNumber *attr_p, void *context); +static bool vci_tlist_consists_of_only_simple_vars(List *tlist, Index scanrelid); + +static AttrNumber vci_satisfies_vci_join(vci_rewrite_plan_context_t *rp_context, Join *join); + +static bool vci_is_supported_operation(Oid oid); +static bool vci_is_not_user_defined_type(Oid oid); + +static void vci_update_plan_tree(PlannedStmt *plannedstmt); +static List *vci_update_target_list(Plan *plan, Plan *gather_plan); + +static bool vci_update_plan_walker(Plan *plan, void *plans); + +/** + * Attempt to rewrite plan and return the rewritten planned stmt is successful. + * + * @param[in] src original planned stmt + * @param[in] eflags flag to be passed to ExecInitNode + * @param[in] snapshot snapshot + * + * @retval non NULL plan after rewrite + * @retval NULL rewrite failed + */ +PlannedStmt * +vci_generate_custom_plan(PlannedStmt *src, int eflags, Snapshot snapshot) +{ + int nParamExec; + bool changed, + dummy; + bool isGather = false; + PlannedStmt *target; + vci_rewrite_plan_context_t rp_context; + + vci_register_applicable_udf(snapshot); + + target = copyObjectImpl(src); + + /* + * Initialize plan rewrite information + */ + memset(&rp_context, 0, sizeof(rp_context)); + + rp_context.plannedstmt = target; + rp_context.max_subplan_attrs = list_length(target->subplans) + 1; + rp_context.subplan_attr_map = palloc0_array(vci_subplan_attr_t, rp_context.max_subplan_attrs); + rp_context.subplan_order_array = palloc_array(int, rp_context.max_subplan_attrs); + rp_context.max_plan_attrs = 16; + rp_context.plan_attr_map = palloc0_array(vci_plan_attr_t, rp_context.max_plan_attrs); + rp_context.last_plan_no = 0; + nParamExec = list_length(target->paramExecTypes); + rp_context.param_exec_attr_map = palloc0_array(vci_param_exec_attr_t, nParamExec); + + for (int i = 0; i < rp_context.max_subplan_attrs; i++) + rp_context.subplan_order_array[i] = i; + + /* + * Preparing for analysis + */ + if (vci_preanalyze_plan_tree(target, &rp_context, eflags, &isGather)) + { + elog(DEBUG1, "Not suitable plan"); + return NULL; + } + + /* Adjust plan tree by moving oss gather plan */ + if (isGather) + vci_update_plan_tree(target); + + /* + * Phase 1: Basic VCI plan rewrite + */ + changed = vci_optimize_phase1(target, &rp_context, eflags); + + if (!changed) + { + elog(DEBUG1, "No plan to be rewritten"); + return NULL; + } + + /* + * VCI plan node do not support backward scan an mark/restore, so insert + * Material node if eflag needs them. + */ + vci_plannedstmt_tree_mutator(target, vci_insert_material_node_mutator, vci_register_plan_id, &rp_context, eflags, &dummy); + + /* Disable community parallelism */ + /* target->parallelModeNeeded=0; */ + + elog(DEBUG1, "Rewrite plan tree"); + + return target; +} + +/*==========================================================================*/ +/* Plan rewrite */ +/*==========================================================================*/ + +/** + * Basic part of VCI plan rewrite + * + * @param[in] plannedstmt plan + * @param[in,out] rp_context Plan rewrite information + * @param[in] eflags flag to be passed to ExecInitNode + * + * @return true if rewrite succeed, false if failed + */ +static bool +vci_optimize_phase1(PlannedStmt *plannedstmt, vci_rewrite_plan_context_t *rp_context, int eflags) +{ + bool changed = false; + + rp_context->forbid_parallel_exec = false; + + if (vci_plannedstmt_tree_mutator_order(plannedstmt, vci_rewrite_plan_tree_mutator, vci_register_plan_id, rp_context, + eflags, &changed, rp_context->subplan_order_array)) + return false; + + return changed; +} + +/** + * Rewrite plan subtree starting with plan into VCI plan + * + * @param[in,out] plan_p Pointer to the plan subtree to start rewriting + * @param[in,out] parent parent plan node of plan + * @param[in,out] context additional context + * @param[in] eflags flag to be passed to ExecInitNode + * @param[out] changed write true if rewrite is executed + * + * @return true when callback function stops cycle, false if cycle is complete + */ +static bool +vci_rewrite_plan_tree_mutator(Plan **plan_p, Plan *parent, void *context, int eflags, bool *changed) +{ + vci_rewrite_plan_context_t *rp_context = (vci_rewrite_plan_context_t *) context; + Plan *plan; + bool saved_forbid_parallel_exec; + bool result; + + plan = *plan_p; + + saved_forbid_parallel_exec = rp_context->forbid_parallel_exec; + rp_context->forbid_parallel_exec = false; + + if (vci_plan_tree_mutator(plan_p, parent, vci_rewrite_plan_tree_mutator, context, eflags, changed)) + return true; + + result = vci_rewrite_plan_node(plan_p, parent, context, eflags, changed); + + if (rp_context->plan_attr_map[plan->plan_no].plan_compat == VCI_PLAN_COMPAT_OK) + rp_context->plan_attr_map[plan->plan_no].plan_compat = rp_context->forbid_parallel_exec ? VCI_PLAN_COMPAT_UNSUPPORTED_OBJ : VCI_PLAN_COMPAT_OK; + rp_context->forbid_parallel_exec |= saved_forbid_parallel_exec; + + return result; +} + +/** + * Rewrute plan node of *plan_p with VCI plan + * + * @param[in,out] plan_p Pointer to the plan to start rewriting + * @param[in,out] parent parent plan of plan + * @param[in,out] context additional context + * @param[in] eflags flag to be passed to ExecInitNode + * @param[out] changed write true if rewrite is executed + * + * @return true when callback function stops cycle, false if cycle is complete + */ +static bool +vci_rewrite_plan_node(Plan **plan_p, Plan *parent, void *context, int eflags, bool *changed) +{ + vci_rewrite_plan_context_t *rp_context = (vci_rewrite_plan_context_t *) context; + Plan *plan; + AttrNumber scan_plan_no = 0; + + plan = *plan_p; + + /* Determine if Vector processing is possible */ + rp_context->suppress_vp = false; + + if (rp_context->plan_attr_map[plan->plan_no].plan_compat != VCI_PLAN_COMPAT_OK) + { + rp_context->forbid_parallel_exec = true; + return false; + } + + /* Determine if there are any expression that cannot be rewritten in plan */ + if (vci_expression_walker(plan, vci_contain_inapplicable_expr_walker, context)) + { + rp_context->forbid_parallel_exec = true; + return false; + } + + switch (nodeTag(plan)) + { + default: + break; + + case T_HashJoin: + { + HashJoin *hjnode = (HashJoin *) plan; + + if (!VciGuc.enable_hashjoin) + return false; + + scan_plan_no = vci_satisfies_vci_join(rp_context, &hjnode->join); + + if (scan_plan_no == 0) + return false; + + elog(DEBUG1, "Replace VCI HashJoin"); + + *changed = true; + + vci_set_inner_plan_type_and_scan_plan_no(rp_context, plan, VCI_INNER_PLAN_TYPE_HASHJOIN, scan_plan_no); + } + break; + + case T_NestLoop: + { + NestLoop *nlnode = (NestLoop *) plan; + + if (!VciGuc.enable_nestloop) + return false; + + scan_plan_no = vci_satisfies_vci_join(rp_context, &nlnode->join); + + if (scan_plan_no == 0) + return false; + + elog(DEBUG1, "Replace VCI NestLoop"); + + *changed = true; + + vci_set_inner_plan_type_and_scan_plan_no(rp_context, plan, VCI_INNER_PLAN_TYPE_NESTLOOP, scan_plan_no); + } + break; + + case T_Sort: + { + Sort *sortnode = (Sort *) plan; + + if (!VciGuc.enable_sort) + return false; + + /* + * Can only be rewritten when outer is VCI + * Scan/HashJoin/NestLoop. VCI Agg cannot be rewritten. Sort + * plan nodes are note consecutive, so VCI Sort will not + * occur. + */ + switch (vci_get_inner_plan_type(rp_context, outerPlan(plan))) + { + case VCI_INNER_PLAN_TYPE_SCAN: + case VCI_INNER_PLAN_TYPE_HASHJOIN: + case VCI_INNER_PLAN_TYPE_NESTLOOP: + /* OK */ + scan_plan_no = vci_get_inner_scan_plan_no(rp_context, outerPlan(plan)); + break; + default: + return false; + } + + Assert(scan_plan_no > 0); + + elog(DEBUG1, "Replace VCI Sort"); + + *plan_p = (Plan *) vci_create_custom_sort(sortnode, scan_plan_no); + *changed = true; + + vci_set_inner_plan_type_and_scan_plan_no(rp_context, plan, VCI_INNER_PLAN_TYPE_SORT, scan_plan_no); + } + break; + + case T_Agg: + { + Agg *aggnode = (Agg *) plan; + + switch (aggnode->aggstrategy) + { + case AGG_SORTED: + if (!VciGuc.enable_sortagg) + return false; + break; + + case AGG_HASHED: + if (!VciGuc.enable_hashagg) + return false; + break; + + case AGG_PLAIN: + if (!VciGuc.enable_plainagg) + return false; + break; + + default: + break; /* LCOV_EXCL_LINE */ + } + + switch (aggnode->aggstrategy) + { + case AGG_SORTED: + if (vci_get_inner_plan_type(rp_context, outerPlan(plan)) != VCI_INNER_PLAN_TYPE_SORT) + return false; + /* OK */ + scan_plan_no = vci_get_inner_scan_plan_no(rp_context, outerPlan(plan)); + break; + + case AGG_HASHED: + case AGG_PLAIN: + switch (vci_get_inner_plan_type(rp_context, outerPlan(plan))) + { + case VCI_INNER_PLAN_TYPE_SCAN: + case VCI_INNER_PLAN_TYPE_HASHJOIN: + case VCI_INNER_PLAN_TYPE_NESTLOOP: + /* OK */ + scan_plan_no = vci_get_inner_scan_plan_no(rp_context, outerPlan(plan)); + break; + default: + return false; + } + break; + + default: + break; /* LCOV_EXCL_LINE */ + } + + Assert(scan_plan_no > 0); + + elog(DEBUG1, "Replace VCI Agg"); + + *plan_p = (Plan *) vci_create_custom_agg(aggnode, scan_plan_no, rp_context->suppress_vp); + *changed = true; + + vci_set_inner_plan_type_and_scan_plan_no(rp_context, plan, VCI_INNER_PLAN_TYPE_AGG, scan_plan_no); + } + break; + + case T_SeqScan: + if (!VciGuc.enable_seqscan) + return false; + else + { + bool each_changed = false; + + switch (VciGuc.table_scan_policy) + { + case VCI_TABLE_SCAN_POLICY_COLUMN_ONLY: + if (true == vci_rewrite_scan_node_via_column_store(plan_p, parent, context, &each_changed)) + return false; + break; + + default: + break; + } + + *changed |= each_changed; + } + break; + + case T_IndexScan: + if (!VciGuc.enable_indexscan) + return false; + + if (((IndexScan *) plan)->indexorderdir != NoMovementScanDirection) + { + elog(DEBUG1, "Need sorting rows if indexscan with indexorderdir(%d) is replaced", + ((IndexScan *) plan)->indexorderdir); + return false; + } + goto process_scan_like_plan; + + case T_BitmapHeapScan: + if (!VciGuc.enable_bitmapheapscan) + return false; + + goto process_scan_like_plan; + + process_scan_like_plan: + { + if (VciGuc.table_scan_policy == VCI_TABLE_SCAN_POLICY_COLUMN_ONLY) + if (true == vci_rewrite_scan_node_via_column_store(plan_p, parent, context, changed)) + return false; + } + break; + + case T_CustomScan: + case T_CustomPlanMarkPos: + return false; + } + + return false; +} + +/** + * Get bitmap of parameters updated by SubPlan via initPlan called from + * the given plan. + */ +static bool +vci_rewrite_scan_node_via_column_store(Plan **plan_p, Plan *parent, void *context, bool *changed) +{ + vci_rewrite_plan_context_t *rp_context = (vci_rewrite_plan_context_t *) context; + Plan *plan = *plan_p; + Scan *scannode = (Scan *) plan; + vci_table_info_t table_info; + List *tlist = NIL; + List *qual = NIL; + AttrNumber scan_plan_no; + + if (rp_context->plan_attr_map[plan->plan_no].preset_eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)) + return false; + + table_info.reloid = getrelid(scannode->scanrelid, rp_context->plannedstmt->rtable); + table_info.indexOid = InvalidOid; + table_info.attrs_used = NULL; + + tlist = scannode->plan.targetlist; + qual = vci_reconstruct_qualification(scannode); + + scan_plan_no = plan->plan_no; + + if (nodeTag(plan) != T_SeqScan) + { + if (expression_tree_walker((Node *) qual, vci_contain_nestloop_param_expr_walker, context)) + { + elog(DEBUG1, "Scan's qual contains any inapplicable expression"); + return false; + } + } + + /* + * Determines whether VCI index containes attributes accessed by the + * query, and if so returns the OID of the VCI index and bitmapset of + * attributes accessed in the query. + */ + if (!vci_can_rewrite_custom_scan(scannode, tlist, qual, parent, &table_info)) + return false; + + tlist = scannode->plan.targetlist; + + elog(DEBUG1, "Replace VCI Scan [column store]: convert from %s", + VciGetNodeName(nodeTag(plan))); + + *plan_p = (Plan *) vci_create_custom_scan_via_column_store(scannode, &table_info, tlist, qual, rp_context->suppress_vp); + *changed = true; + + vci_set_inner_plan_type_and_scan_plan_no(rp_context, plan, VCI_INNER_PLAN_TYPE_SCAN, scan_plan_no); + + return false; +} + +/** + * Insert Material node into the tree that has already been rewritten to VCI plan node + * as necessary. + * + * @param[in,out] plan_p rewritten plan tree + * @param[in,out] parent parent plan of plan + * @param[in,out] context additional context + * @param[in] eflags flag to be passed to ExecInitNode + * @param[out] changed write true if rewrite is executed + * + * @return true when callback function stop cycle, false if cycle is complete + * + * None of the VCI plan nodes support mark/restore, backward scan, or rewind (efficient scan). + * If they are needed, insert a Materialnode above them to handle. + */ +static bool +vci_insert_material_node_mutator(Plan **plan_p, Plan *parent, void *context, int eflags, bool *changed) +{ + vci_rewrite_plan_context_t *rp_context = (vci_rewrite_plan_context_t *) context; + Material *newplan; + VciPlan *targetplan; + Plan *plan; + + plan = *plan_p; + + switch (nodeTag(plan)) + { + case T_CustomScan: + case T_CustomPlanMarkPos: + switch (vci_get_inner_plan_type(rp_context, plan)) + { + case VCI_INNER_PLAN_TYPE_SORT: + + /* + * VCI Sort node does not support EXEC_FLAG_BACKWARD and + * EXEC_FLAG_MARK, so insert a Material node between them. + * + * VCI Sort can be used if only EXEC_FLAG_REWIND + */ + if ((eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)) != 0) + { + targetplan = (VciPlan *) plan; + goto maybe_need_material_node; + } + break; + + case VCI_INNER_PLAN_TYPE_SCAN: + case VCI_INNER_PLAN_TYPE_AGG: + Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); + /* pass through */ + /* pgr0007 */ + + default: + break; + } + break; + + case T_Limit: + if (outerPlan(plan) && (vci_get_inner_plan_type(rp_context, outerPlan(plan)) == VCI_INNER_PLAN_TYPE_SORT)) + { + if ((eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)) != 0) + { + targetplan = (VciPlan *) outerPlan(plan); + goto maybe_need_material_node; + } + } + break; + + default: + break; + } + + if (vci_plan_tree_mutator(plan_p, parent, vci_insert_material_node_mutator, context, eflags, changed)) + return true; + + return false; + +maybe_need_material_node: + newplan = makeNode(Material); + + newplan->plan.targetlist = vci_generate_pass_through_target_list(plan->targetlist); + newplan->plan.qual = NIL; + newplan->plan.lefttree = plan; + newplan->plan.plan_no = ++rp_context->last_plan_no; + vci_expand_plan_attr_map(rp_context); + + copy_plan_costsize(&newplan->plan, plan); + + newplan->plan.extParam = bms_copy(plan->extParam); + newplan->plan.allParam = bms_copy(plan->allParam); + + newplan->plan.initPlan = plan->initPlan; + plan->initPlan = NULL; + + *plan_p = (Plan *) newplan; + *changed = true; + + rp_context->plan_attr_map[targetplan->cscan.scan.plan.plan_no].preset_eflags &= ~(EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK); + + return false; +} + +/** + * Create VCI Sort node + */ +static VciSort * +vci_create_custom_sort(Sort *sortnode, AttrNumber scan_plan_no) +{ + VciSort *sort; + + sort = palloc0_object(VciSort); + + sort->vci.cscan.scan.plan = sortnode->plan; + sort->vci.cscan.scan.plan.type = T_CustomPlanMarkPos; /* Mark restore support */ + sort->vci.cscan.flags = VCI_CUSTOMPLAN_SORT | CUSTOMPATH_SUPPORT_BACKWARD_SCAN | CUSTOMPATH_SUPPORT_MARK_RESTORE; + sort->vci.cscan.custom_relids = NULL; + sort->vci.cscan.methods = &vci_sort_scan_methods; + + sort->vci.scan_plan_no = scan_plan_no; + sort->vci.orig_plan = (Plan *) sortnode; + + sort->numCols = sortnode->numCols; + sort->sortColIdx = sortnode->sortColIdx; + sort->sortOperators = sortnode->sortOperators; + sort->collations = sortnode->collations; + sort->nullsFirst = sortnode->nullsFirst; + + return sort; +} + +/** + * Create VCI Agg node + */ +static VciAgg * +vci_create_custom_agg(Agg *aggnode, AttrNumber scan_plan_no, bool suppress_vp) +{ + VciAgg *agg; + + agg = palloc0_object(VciAgg); + + agg->vci.cscan.scan.plan = aggnode->plan; + agg->vci.cscan.scan.plan.type = T_CustomScan; /* Not mark restore + * support */ + agg->vci.cscan.flags = VCI_CUSTOMPLAN_AGG; + agg->vci.cscan.custom_relids = NULL; + + switch (aggnode->aggstrategy) + { + case AGG_HASHED: + agg->vci.cscan.methods = &vci_hashagg_scan_methods; + break; + + case AGG_SORTED: + agg->vci.cscan.methods = &vci_groupagg_scan_methods; + break; + + case AGG_PLAIN: + agg->vci.cscan.methods = &vci_agg_scan_methods; + break; + + default: + break; /* LCOV_EXCL_LINE */ + } + + agg->vci.scan_plan_no = scan_plan_no; + agg->vci.orig_plan = (Plan *) aggnode; + + agg->aggstrategy = aggnode->aggstrategy; + agg->numCols = aggnode->numCols; + agg->grpColIdx = aggnode->grpColIdx; + agg->grpOperators = aggnode->grpOperators; + agg->grpCollations = aggnode->grpCollations; + agg->numGroups = aggnode->numGroups; + + return agg; +} + +/** + * Determine if Scan plan node can be rewritten to VCI Scan + * + * Checks that there is a VCI index with all columns (attributes) to be read + * from the table. If there is more than one matching VCI index, the earlier one + * in the index list is used. + * + * @param[in] scanrelid relid of target table + * @param[in] reloid oid of target table + * @param[in] targetlist targetlist of target Scan plan + * @param[in] qual qual of target Scan plan + * @param[in] parent Parent plan node of target Scan plan + * @param[in,out] table_info Input information about targe table and returns information obtained within this function + * + * @retval true if rewriteable, false if not + */ +static bool +vci_can_rewrite_custom_scan(Scan *scannode, List *targetlist, List *qual, Plan *parent, vci_table_info_t *table_info) +{ + Index scanrelid; + vci_gather_used_attrs_t gcontext; + int orig_natts, + opt_natts; + Relation tableRel; + double estimate_tuples; + Oid foundVciIndexOid = InvalidOid; + Bitmapset *attrs_used = NULL; + bool do_minimize_tlist = false; + Bitmapset *attrs_used_from_parent = NULL; + List *indexoidlist = NIL; + ListCell *indexoidscan; + Index parent_refer_relid = 0; + + scanrelid = scannode->scanrelid; + + gcontext.scanrelid = scanrelid; + gcontext.attrs_used = NULL; + + if (expression_tree_walker((Node *) qual, vci_gather_used_attrs, &gcontext) || + expression_tree_walker((Node *) targetlist, vci_gather_used_attrs, &gcontext)) + return false; + + attrs_used = gcontext.attrs_used; + + orig_natts = opt_natts = bms_num_members(attrs_used); + + if (orig_natts == 0) + return false; + + if (parent) + { + if ((Plan *) scannode == outerPlan(parent)) + parent_refer_relid = OUTER_VAR; + else if ((Plan *) scannode == innerPlan(parent)) + parent_refer_relid = INNER_VAR; + } + + /* + * To improve the read performance of SeqScan, PostgreSQL may sort the + * target list according to the order of columns in the heap tuple, + * including columns that are not actually referenced by the upper node. + * + * In a columnar system, such optimizations are harmful, so optimizations + * are needed to stop reading unnecessary columns. + * + * First, calculate the columns that are truly referenced from the upper + * node. + * + * This optimization only looks at the next higher node. Hash does not + * work because it works in conjunction with HashJoin, which is even + * higher up. + */ + if ((parent_refer_relid != 0) && vci_tlist_consists_of_only_simple_vars(targetlist, scanrelid)) + { + switch (nodeTag(parent)) + { + case T_Agg: + case T_Group: + case T_HashJoin: + case T_MergeJoin: + case T_NestLoop: + attrs_used_from_parent = vci_gather_used_attrs_in_plan(parent, parent_refer_relid); + do_minimize_tlist = true; + break; + default: + break; + } + } + + if (do_minimize_tlist) + { + Bitmapset *new_attrs_used; + + gcontext.scanrelid = scanrelid; + gcontext.attrs_used = NULL; + + expression_tree_walker((Node *) qual, vci_gather_used_attrs, &gcontext); + + new_attrs_used = bms_add_members(gcontext.attrs_used, attrs_used_from_parent); + + /* + * Compare the attributes referenced by Scan with the attributes + * referenced by the WHERE clause and the attributes referenced by the + * parent node. + */ + if (bms_equal(attrs_used, new_attrs_used)) + { + bms_free(new_attrs_used); + bms_free(attrs_used_from_parent); + + attrs_used_from_parent = NULL; + do_minimize_tlist = false; + } + else + { + bms_free(attrs_used); + + attrs_used = new_attrs_used; + + opt_natts = bms_num_members(attrs_used); + } + } + + /* + * Lock table for index calculation + */ + tableRel = table_open(table_info->reloid, AccessShareLock); + + estimate_tuples = (double) Max(tableRel->rd_rel->reltuples, 0); + + elog(DEBUG1, "vci index: target table \"%s\"(oid=%u) tuples(rows=%.0f,extents=%u)", + NameStr(tableRel->rd_rel->relname), table_info->reloid, + estimate_tuples, (int) (estimate_tuples / VCI_NUM_ROWS_IN_EXTENT)); + + if (estimate_tuples < (double) VciGuc.table_rows_threshold) + { + elog(DEBUG1, "vci index: target table \"%s\"(oid=%u) is too few rows. threshold=%d", + NameStr(tableRel->rd_rel->relname), table_info->reloid, VciGuc.table_rows_threshold); + + goto done; + } + + /* + * Find the VCI index from the indexes existing in the table and check + * whether the table contains attrs_used. + */ + indexoidlist = RelationGetIndexList(tableRel); + + foreach(indexoidscan, indexoidlist) + { + Relation indexRel; + Oid indexOid; + + indexOid = lfirst_oid(indexoidscan); + indexRel = index_open(indexOid, AccessShareLock); + + if (isVciIndexRelation(indexRel)) + { + Form_pg_index indexStruct = indexRel->rd_index; + Bitmapset *attrs_indexed; + + /* + * If the index is valid, but cannot yet be used, ignore it. (See + * L.190 src/backend/optimizer/util/plancat.c) See + * src/backend/access/heap/README.HOT for discussion. + */ + if (indexStruct->indcheckxmin && + !TransactionIdPrecedes(HeapTupleHeaderGetXmin(indexRel->rd_indextuple->t_data), + TransactionXmin)) + { + index_close(indexRel, AccessShareLock); + continue; + } + + attrs_indexed = vci_MakeIndexedColumnBitmap(indexOid, + CurrentMemoryContext, + AccessShareLock); + + if (bms_is_subset(attrs_used, attrs_indexed)) + { + elog(DEBUG1, "vci index: adopt index \"%s\"(oid=%u)", + NameStr(indexRel->rd_rel->relname), indexOid); + + foundVciIndexOid = indexOid; + } + else + { + int num, + x; + + elog(DEBUG1, "vci index: don't match index \"%s\"(oid=%u)", + NameStr(indexRel->rd_rel->relname), indexOid); + + num = bms_num_members(attrs_used); + x = 1; + while (num > 0) + { + if (bms_is_member(x, attrs_used)) + { + elog(DEBUG1, "\tattrnum = %d%s", x, bms_is_member(x, attrs_indexed) ? " x" : ""); + num--; + } + x++; + } + } + + bms_free(attrs_indexed); + } + + index_close(indexRel, AccessShareLock); + + if (OidIsValid(foundVciIndexOid)) + break; + } + + list_free(indexoidlist); + +done: + table_close(tableRel, AccessShareLock); + + if (OidIsValid(foundVciIndexOid)) + { + if (do_minimize_tlist) + { + elog(DEBUG1, "vci index: minimize targetlist %d -> %d handing over %s", + orig_natts, opt_natts, VciGetNodeName(nodeTag(parent))); + + vci_minimize_tlist_of_scan(scannode, parent, parent_refer_relid, attrs_used_from_parent); + bms_free(attrs_used_from_parent); + } + + table_info->indexOid = foundVciIndexOid; + table_info->estimate_tuples = estimate_tuples; + table_info->attrs_used = attrs_used; + } + else + { + bms_free(attrs_used); + } + + return OidIsValid(foundVciIndexOid); +} + +/** + * Determine if the given target list consists only of Simple Vars + * referencing a single input tuple. + */ +static bool +vci_tlist_consists_of_only_simple_vars(List *tlist, Index scanrelid) +{ + ListCell *tl; + Index attno = 1; + + foreach(tl, tlist) + { + TargetEntry *tle; + Var *var; + + tle = (TargetEntry *) lfirst(tl); + + if (!tle->expr || !IsA(tle->expr, Var)) + return false; + + var = (Var *) tle->expr; + + if (var->varno != scanrelid) + return false; + + if (var->varattno != attno) + return false; + + attno++; + } + + return true; +} + +/** + * Collect the bitmap of attributes referenced as scanrelid within the specified plan node. + */ +static Bitmapset * +vci_gather_used_attrs_in_plan(Plan *plan, Index scanrelid) +{ + vci_gather_used_attrs_t gcontext; + + gcontext.scanrelid = scanrelid; + gcontext.attrs_used = NULL; + + if (vci_expression_and_colid_walker(plan, vci_gather_used_attrs, vci_gather_one_used_attr, &gcontext)) + { + bms_free(gcontext.attrs_used); + return NULL; + } + + return gcontext.attrs_used; +} + +/** + * Scan Var node in the VCI Scan node and obtain the attrno of attributes + * that require data supply from the VCI index. + */ +static bool +vci_gather_used_attrs(Node *node, void *context) +{ + vci_gather_used_attrs_t *gcontext = (vci_gather_used_attrs_t *) context; + + if (node == NULL) + return false; + + switch (nodeTag(node)) + { + case T_Var: + { + Var *var = (Var *) node; + + if (gcontext->scanrelid != var->varno) + return false; + + gcontext->attrs_used = bms_add_member(gcontext->attrs_used, var->varattno); + } + return false; + + default: + break; + } + + return expression_tree_walker(node, vci_gather_used_attrs, context); +} + +/** + * Records to vci_gather_used_attrs_t because it is *attr_p attribute going to be referenced + */ +static void +vci_gather_one_used_attr(AttrNumber *attr_p, void *context) +{ + vci_gather_used_attrs_t *gcontext = (vci_gather_used_attrs_t *) context; + + Assert(*attr_p > 0); + + gcontext->attrs_used = bms_add_member(gcontext->attrs_used, *attr_p); +} + +/** + * Delete nodes in targetlist of Scan node that are not referenced by higher-level nodes. + * At the same time, change the attno of outer var or inner var within higher-leve nodes. + */ +static void +vci_minimize_tlist_of_scan(Scan *scannode, Plan *parent, Index parent_refer_relid, Bitmapset *attrs_used_from_parent) +{ + vci_renumber_attrs_t rcontext; + AttrNumber last_attr; + int j; + AttrNumber resno; + List *tlist; + List *new_tlist = NIL; + ListCell *lc; + + tlist = scannode->plan.targetlist; + + last_attr = list_length(tlist); + + rcontext.scanrelid = parent_refer_relid; + rcontext.attr_map = palloc0_array(AttrNumber, (last_attr + 1)); + + j = 1; + for (int i = 1; i <= last_attr; i++) + if (bms_is_member(i, attrs_used_from_parent)) + rcontext.attr_map[i] = j++; + + if (vci_expression_and_colid_walker(parent, vci_renumber_attrs, vci_renumber_one_attr, &rcontext)) + elog(ERROR, "planner failed to minimize tlist of scan"); + + resno = 1; + new_tlist = NIL; + foreach(lc, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + Assert(IsA(tle, TargetEntry)); + + if (rcontext.attr_map[tle->resno] > 0) + { + tle->resno = resno++; + new_tlist = lappend(new_tlist, tle); + } + } + + pfree(rcontext.attr_map); + + scannode->plan.targetlist = new_tlist; +} + +/** + * Renumber attribute numbers in the subtree under the specified expression node. + */ +static bool +vci_renumber_attrs(Node *node, void *context) +{ + vci_renumber_attrs_t *rcontext = (vci_renumber_attrs_t *) context; + + if (node == NULL) + return false; + + switch (nodeTag(node)) + { + case T_Var: + { + Var *var = (Var *) node; + + if (rcontext->scanrelid != var->varno) + return false; + + if (var->varattno <= InvalidAttrNumber) + return true; + + var->varattno = rcontext->attr_map[var->varattno]; + } + return false; + + default: + break; + } + + return expression_tree_walker(node, vci_renumber_attrs, context); +} + +/** + * Renumber attribute number located at the position of attr_p + */ +static void +vci_renumber_one_attr(AttrNumber *attr_p, void *context) +{ + vci_renumber_attrs_t *rcontext = (vci_renumber_attrs_t *) context; + + Assert(*attr_p > 0); + + *attr_p = rcontext->attr_map[*attr_p]; +} + +/** + * Combine qual when Scan derived node returned to SeqScan node + */ +static List * +vci_reconstruct_qualification(Scan *scannode) +{ + List *qual = scannode->plan.qual; + + switch (nodeTag(scannode)) + { + case T_SeqScan: + qual = list_copy(qual); + break; + + case T_IndexScan: + qual = list_copy(qual); + if (((IndexScan *) scannode)->indexqualorig) + qual = list_concat(qual, ((IndexScan *) scannode)->indexqualorig); + break; + + case T_BitmapHeapScan: + qual = list_copy(qual); + if (((BitmapHeapScan *) scannode)->bitmapqualorig) + qual = list_concat(qual, ((BitmapHeapScan *) scannode)->bitmapqualorig); + break; + + default: + Assert(0); + break; + } + + return qual; +} + +/** + * Create VCI Scan node + */ +static VciScan * +vci_create_custom_scan_via_column_store(Scan *scannode, const vci_table_info_t *table_info, List *tlist, List *qual, bool suppress_vp) +{ + VciScan *scan; + + scan = palloc0_object(VciScan); + + scan->vci.cscan.scan.plan = scannode->plan; + scan->vci.cscan.scan.plan.parallel_aware = false; + scan->vci.cscan.scan.plan.type = T_CustomPlanMarkPos; + + scan->vci.cscan.scan.plan.targetlist = tlist; + scan->vci.cscan.scan.plan.qual = qual; + + scan->vci.cscan.scan.scanrelid = scannode->scanrelid; + + scan->vci.cscan.flags = VCI_CUSTOMPLAN_SCAN | CUSTOMPATH_SUPPORT_MARK_RESTORE; + scan->vci.cscan.custom_relids = bms_make_singleton(scannode->scanrelid); + scan->vci.cscan.methods = &vci_scan_scan_methods; + + scan->vci.scan_plan_no = scan->vci.cscan.scan.plan.plan_no; + scan->vci.orig_plan = (Plan *) scannode; + + scan->scan_mode = VCI_SCAN_MODE_COLUMN_STORE; + scan->scanrelid = scannode->scanrelid; + scan->reloid = table_info->reloid; + scan->indexoid = table_info->indexOid; + scan->attr_used = table_info->attrs_used; + scan->num_attr_used = bms_num_members(table_info->attrs_used); + scan->estimate_tuples = table_info->estimate_tuples; + scan->is_all_simple_vars = vci_tlist_consists_of_only_simple_vars(tlist, scannode->scanrelid); + + return scan; +} + +/** + * Return true when expression node that cannot be executed in custom plan is detected, + * false if they are all custom plan applicable + */ +static bool +vci_contain_inapplicable_expr_walker(Node *node, void *context) +{ + vci_rewrite_plan_context_t *rp_context = (vci_rewrite_plan_context_t *) context; + + Assert(context); + + if (node == NULL) + return false; + + switch (nodeTag(node)) + { + case T_Var: + { + Var *var = (Var *) node; + + /* varattno == InvalidAttrNumber means it's a whole-row Var */ + if (var->varattno == InvalidAttrNumber) + return true; + + /* + * varattno < InvalidAttrNumber means it's a system-defined + * attribute + */ + else if (var->varattno < InvalidAttrNumber) + return true; + } + break; + + case T_FuncExpr: + { + FuncExpr *expr = (FuncExpr *) node; + + if (expr->funcretset) + { + elog(DEBUG1, "FuncExpr contains returning-set function"); + return true; + } + + if (expr->funcvariadic) + { + elog(DEBUG1, "FuncExpr contains funcvariadic"); + return true; + } + + if (!vci_is_supported_function(expr->funcid)) + { + elog(DEBUG1, "FuncExpr contains not-supported function: oid=%d", expr->funcid); + return true; + } + + if (!vci_is_not_user_defined_type(expr->funcresulttype)) + { + elog(DEBUG1, "FuncExpr contains user defined type: oid=%d", expr->funcresulttype); + return true; + } + + /* + * Always returns true here to create vci_runs_in_plan() + * result. Overwrite to function. + */ + if (expr->funcid == vci_special_udf_info.vci_runs_in_plan_funcoid) + expr->funcid = vci_special_udf_info.vci_always_return_true_funcoid; + } + break; + + case T_OpExpr: + case T_DistinctExpr: /* struct-equivalent to OpExpr */ + case T_NullIfExpr: /* struct-equivalent to OpExpr */ + { + OpExpr *expr = (OpExpr *) node; + + if (expr->opretset) + { + elog(DEBUG1, "%s contains returning-set function", VciGetNodeName(nodeTag(node))); + return true; + } + + if (!vci_is_supported_operation(expr->opfuncid)) + { + elog(DEBUG1, "%s contains not-supported operation: oid=%d", VciGetNodeName(nodeTag(node)), expr->opfuncid); + return true; + } + + if (!vci_is_not_user_defined_type(expr->opresulttype)) + { + elog(DEBUG1, "%s contains user defined type: oid=%d", VciGetNodeName(nodeTag(node)), expr->opresulttype); + return true; + } + } + break; + + case T_Param: + { + Param *param = (Param *) node; + int paramid = param->paramid; + + /* Not support PARAM_EXTERN or PARAM_SUBLINK */ + if (param->paramkind != PARAM_EXEC) + { + elog(DEBUG1, "Param contains extern or sublink"); + return true; + } + + /* + * Check Param defined or referenced by multiple plan node + */ + switch (rp_context->param_exec_attr_map[paramid].type) + { + case VCI_PARAM_EXEC_NESTLOOP: + /* VCI compatible, for calls via NestLoop */ + break; + + case VCI_PARAM_EXEC_INITPLAN: + case VCI_PARAM_EXEC_SUBPLAN: + + /* + * not VCI compatible, for calls via initPlan or + * SubPlan + */ + if (rp_context->param_exec_attr_map[paramid].num_def_plans > 1) + { + elog(DEBUG1, "Param contains multi defining plans"); + return true; + } + + if (rp_context->param_exec_attr_map[paramid].num_use_plans > 1) + { + elog(DEBUG1, "Param contains multi referencing plans"); + return true; + } + break; + + /* LCOV_EXCL_START */ + default: + + /* + * Commenting out below code as there is possibility + * to reach here when optimizer optimizes the plan to + * remove subplan node itself. E.g: Create view V1 as + * SELECT *, (SELECT d FROM t11 WHERE t11.a = t1.a + * LIMIT 1) AS d FROM t1 WHERE a > 5; and run SELECT * + * FROM v1 where a=3; + */ + /* elog(PANIC, "Should not reach here."); */ + break; + /* LCOV_EXCL_STOP */ + } + } + break; + + case T_Const: + case T_List: + break; + + case T_Aggref: + { + Aggref *aggref = (Aggref *) node; + + /* Not support ordered-set or hypothetical */ + if (aggref->aggkind != AGGKIND_NORMAL) + { + elog(DEBUG1, "Aggref contains %c", aggref->aggkind); + return true; + } + + /* Not support polymorphic and variadic aggregation */ + if (aggref->aggvariadic) + { + elog(DEBUG1, "Aggref contains variadic aggregation"); + return true; + } + + /* Not support FILTER expression */ + if (aggref->aggfilter != NULL) + { + elog(DEBUG1, "Aggref contains FILTER expression"); + return true; + } + + /* Not support DISTINCT */ + if (aggref->aggdistinct != NIL) + { + elog(DEBUG1, "Aggref contains DISTINCT"); + return true; + } + + /* Not support ORDER BY */ + if (aggref->aggorder != NIL) + { + elog(DEBUG1, "Aggref contains ORDER BY"); + return true; + } + + /* Not support user-defined aggregation */ + if (!vci_is_supported_aggregation(aggref)) + return true; + } + break; + + case T_ScalarArrayOpExpr: + break; + + case T_BoolExpr: + break; + + case T_RelabelType: + case T_CoalesceExpr: + case T_MinMaxExpr: + break; + case T_NullTest: + { + NullTest *ntest = (NullTest *) node; + + if (ntest->argisrow) + { + elog(DEBUG1, "NullTest contains row-format"); + return true; + } + } + break; + + case T_BooleanTest: + case T_TargetEntry: + break; + + case T_CoerceViaIO: + break; + + case T_CaseExpr: + case T_CaseTestExpr: + break; + + case T_SubPlan: + return true; + + case T_ArrayExpr: + case T_ArrayCoerceExpr: + case T_ConvertRowtypeExpr: + case T_RowExpr: + case T_RowCompareExpr: + case T_SubscriptingRef: + case T_WindowFunc: + case T_XmlExpr: + case T_WindowClause: + case T_CommonTableExpr: + case T_FieldSelect: + case T_FieldStore: + case T_RangeTblFunction: + case T_AlternativeSubPlan: + case T_SetOperationStmt: + case T_AppendRelInfo: + case T_WithCheckOption: /* nserting/updating an auto-updatable view */ + case T_CurrentOfExpr: /* CURRENT OF cursor_name */ + case T_CoerceToDomain: + case T_CoerceToDomainValue: + case T_GroupingFunc: + case T_SQLValueFunction: + case T_NextValueExpr: + return true; + + case T_Query: + case T_FromExpr: + case T_JoinExpr: + case T_PlaceHolderVar: + case T_PlaceHolderInfo: + case T_CollateExpr: + case T_SubLink: + case T_RangeTblRef: + case T_SortGroupClause: + case T_NamedArgExpr: + case T_SetToDefault: /* a DEFAULT marker in an INSERT or UPDATE + * command */ + return true; /* LCOV_EXCL_LINE */ + + default: + /* LCOV_EXCL_START */ + elog(ERROR, "unrecognized node type: %s(%d)", + VciGetNodeName(nodeTag(node)), (int) nodeTag(node)); + break; + /* LCOV_EXCL_STOP */ + } + + return expression_tree_walker(node, vci_contain_inapplicable_expr_walker, context); +} + +/** + * Returns true if it references Param defined in NestLoop + */ +static bool +vci_contain_nestloop_param_expr_walker(Node *node, void *context) +{ + vci_rewrite_plan_context_t *rp_context = (vci_rewrite_plan_context_t *) context; + + if (node == NULL) + return false; + + if (nodeTag(node) == T_Param) + { + Param *param = (Param *) node; + int paramid = param->paramid; + + if (rp_context->param_exec_attr_map[paramid].type == VCI_PARAM_EXEC_NESTLOOP) + { + elog(DEBUG1, "Param contains non-permitted paramId"); + return true; + } + } + + return expression_tree_walker(node, vci_contain_nestloop_param_expr_walker, context); +} + +/*==========================================================================*/ +/* Determine if function/typeis supported in VCI */ +/*==========================================================================*/ + +/** + * Determine if Join is supported + * + * @param[in] jointype Join type + * @return true if supported, false if not + */ +bool +vci_is_supported_jointype(JoinType jointype) +{ + switch (jointype) + { + case JOIN_INNER: + case JOIN_SEMI: + case JOIN_ANTI: + case JOIN_LEFT: + return true; + + case JOIN_RIGHT: + case JOIN_FULL: + default: + return false; + } +} + +/** + * Determine whether Join plan node can be incorporated into parallel plan group. + * + * @param[in] rp_context Join type + * @param[in] join Join type + * + * @return 0 if cannot be incorporated, return plan_no of the VCI Scan that will + * result in partitioned table + */ +static AttrNumber +vci_satisfies_vci_join(vci_rewrite_plan_context_t *rp_context, Join *join) +{ + Plan *outer, + *inner; + + if (!vci_is_supported_jointype(join->jointype)) + return 0; + + outer = outerPlan(join); + inner = innerPlan(join); + + if (rp_context->plan_attr_map[outer->plan_no].plan_compat != VCI_PLAN_COMPAT_OK) + { + elog(DEBUG1, "Join's outer subtree contains not-parallel-executable plannode"); + return 0; + } + + if (rp_context->plan_attr_map[inner->plan_no].plan_compat != VCI_PLAN_COMPAT_OK) + { + elog(DEBUG1, "Join's inner subtree contains not-parallel-executable plannode"); + return 0; + } + + /* + * Check if outer can be used as partitioned table + * + * Rewriteable only when VCI Scan/HashJoin/NestLoop. Not rewriteable for + * VCI Sort/VCI Agg. + */ + switch (vci_get_inner_plan_type(rp_context, outer)) + { + case VCI_INNER_PLAN_TYPE_SCAN: + case VCI_INNER_PLAN_TYPE_HASHJOIN: + case VCI_INNER_PLAN_TYPE_NESTLOOP: + /* OK */ + return vci_get_inner_scan_plan_no(rp_context, outer); + + default: + break; + } + + /* + * If outer cannot be used as partitioned table, try to use inner. + * However, inner-side is generally unsuitable for partitioned table, so + * stricter restrictions are imposed than on outer. + */ + + if (nodeTag(inner) == T_Hash) + inner = outerPlan(inner); + else + return 0; + + if ((inner == NULL) || (join->jointype != JOIN_INNER)) + return 0; + + if (inner->plan_rows < (double) VciGuc.table_rows_threshold) + return 0; + + /* + * outer-side should be less than threshold + * + * This restriction is imposed because performance deteriorates when a + * partitioned table is established on the inner-side when the outer side + * is too large. + */ + if ((double) VciGuc.table_rows_threshold <= outer->plan_rows) + return 0; + + if ((vci_get_inner_plan_type(rp_context, inner) == VCI_INNER_PLAN_TYPE_SCAN) && + (inner->allParam == NULL)) + { + switch (nodeTag(outer)) + { + case T_SeqScan: + case T_BitmapHeapScan: + + case T_IndexScan: + /* OK */ + return vci_get_inner_scan_plan_no(rp_context, inner); + + default: + break; + } + } + + return 0; +} + +/** + * Determine whether the given oid is an operation supported by VCI + */ +static bool +vci_is_supported_operation(Oid oid) +{ + return oid < FirstNormalObjectId; +} + +/** + * Determine whether the given oid is not user defined type + */ +static bool +vci_is_not_user_defined_type(Oid oid) +{ + return oid < FirstNormalObjectId; +} + +/*==========================================================================*/ +/* Register map of Plan on SMC and Plan State on backend */ +/*==========================================================================*/ + +/*==========================================================================*/ +/* Implementation of PG function to check VCI execution */ +/*==========================================================================*/ + +PG_FUNCTION_INFO_V1(vci_runs_in_query); +PG_FUNCTION_INFO_V1(vci_runs_in_plan); +PG_FUNCTION_INFO_V1(vci_always_return_true); + +/** + * PG function that returns whether query is being executed by VCI + * + * @param[in] PG_FUNCTION_ARGS Pointer to data struct passed to PG function + * @return true if VCI is runnning, false if not + */ +Datum +vci_runs_in_query(PG_FUNCTION_ARGS) +{ + return BoolGetDatum(vci_is_processing_custom_plan()); +} + +/** + * PG function that returns whether the plan node containing this function call is VCI plan node + * + * @param[in] PG_FUNCTION_ARGS Pointer to data struct passed to PG function + * @return always false + */ +Datum +vci_runs_in_plan(PG_FUNCTION_ARGS) +{ + return BoolGetDatum(false); +} + +/** + * Function that always returns true + * + * @param[in] PG_FUNCTION_ARGS Pointer to data struct passed to PG function + * @return always true + * + * The vci_runs_in_plan function in the query is overridden by this function, + * which always returns true if the plan rewrite determines that a VCI plan node is connected. + */ +Datum +vci_always_return_true(PG_FUNCTION_ARGS) +{ + return BoolGetDatum(true); +} + +/* + * This function is used to update the plan tree by removing + * the gather plan from the tree and adjust the targetlist + * in custom_vci_plan based on the partial_plan and gather_plan. + */ +static void +vci_update_plan_tree(PlannedStmt *plannedstmt) +{ + Plan *plan = NULL; + List *newsubplans = NIL; + + father_gather_plans plans; + + memset(&plans, 0, sizeof(father_gather_plans)); + + if (plannedstmt->planTree) + { + plan = plannedstmt->planTree; + + if (nodeTag(plan) == T_Gather || nodeTag(plan) == T_GatherMerge) + { + plannedstmt->planTree = plan->lefttree; + plans.gather_plan = plan; + + /* + * The targetlist of the Gather/GatherMerge node and the + * underlying node should be the same (this is enforced in + * preanalyze_plan_tree_mutator()). However, the + * Gather/GatherMerge node may have additional information that + * needs to be retained (by the underlying node) once it is + * removed. + */ + vci_update_target_list(plannedstmt->planTree, plan); + } + plans.father_plan = plan; + vci_plan_tree_walker(plan, vci_update_plan_walker, &plans); + + } + + if (plannedstmt->subplans) + { + ListCell *l; + + foreach(l, plannedstmt->subplans) + { + Plan *subplan = (Plan *) lfirst(l); + + if (subplan == NULL) + continue; + + plans.father_plan = subplan; + if (nodeTag(subplan) == T_Gather || nodeTag(subplan) == T_GatherMerge) + { + plans.gather_plan = subplan; + subplan = subplan->lefttree; + } + newsubplans = lappend(newsubplans, subplan); + vci_plan_tree_walker(subplan, vci_update_plan_walker, &plans); + } + plannedstmt->subplans = newsubplans; + } + +} +static bool +vci_update_plan_walker(Plan *plan, void *plans) +{ + father_gather_plans *fg_plans = (father_gather_plans *) plans; + father_gather_plans fg_plans_local; + + if (plan == NULL) + return false; + /* Go through the every plan here */ + if (nodeTag(plan) == T_Gather || nodeTag(plan) == T_GatherMerge) + { + if (fg_plans->father_plan->lefttree == plan) + { + fg_plans->father_plan->lefttree = plan->lefttree; + } + else if (fg_plans->father_plan->righttree == plan) + { + fg_plans->father_plan->righttree = plan->lefttree; + } + else + { + /* + * Not expected scenario, All other cases should already mark that + * VCI is not possible. + */ + elog(ERROR, "The plan must be either left or right child of parent."); + } + + fg_plans->gather_plan = plan; + } + else if (nodeTag(plan) == T_CustomPlanMarkPos && fg_plans->gather_plan) + { + plan->plan_rows = fg_plans->gather_plan->plan_rows; + plan->parallel_aware = 0; + vci_update_target_list(plan, fg_plans->gather_plan); + } + + fg_plans_local.gather_plan = fg_plans->gather_plan; + fg_plans_local.father_plan = plan; + + return vci_plan_tree_walker(plan, vci_update_plan_walker, &fg_plans_local); + +} + +/* + * If vci_scan is created based on partial scan, some fields will be updated + * by the targetlist in gather_plan. This function is used to do this job. + * + */ +static List * +vci_update_target_list(Plan *plan, Plan *gather_plan) +{ + ListCell *cell1, + *cell2; + + forboth(cell1, plan->targetlist, cell2, gather_plan->targetlist) + { + TargetEntry *te1 = (TargetEntry *) lfirst(cell1); + TargetEntry *te2 = (TargetEntry *) lfirst(cell2); + + te1->resname = te2->resname; + } + + return plan->targetlist; +} diff --git a/contrib/vci/executor/vci_planner_preanalyze.c b/contrib/vci/executor/vci_planner_preanalyze.c new file mode 100644 index 0000000..6387653 --- /dev/null +++ b/contrib/vci/executor/vci_planner_preanalyze.c @@ -0,0 +1,413 @@ +/*------------------------------------------------------------------------- + * + * vci_planner_preanalyze.c + * Preprocessing for plan rewrite routine + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/executor/vci_planner_preanalyze.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include /* for qsort() */ + +#include "access/htup_details.h" +#include "access/sysattr.h" +#include "access/transam.h" +#include "catalog/pg_aggregate.h" +#include "catalog/pg_am.h" +#include "catalog/pg_namespace.h" /* for PG_PUBLIC_NAMESPACE */ +#include "catalog/pg_proc.h" /* for ProcedureRelationId, Form_pg_proc */ +#include "catalog/pg_type.h" /* for BOOLOID */ +#include "executor/executor.h" +#include "executor/nodeCustom.h" +#include "miscadmin.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/plancat.h" +#include "optimizer/planmain.h" +#include "optimizer/planner.h" +#include "optimizer/restrictinfo.h" +#include "parser/parsetree.h" +#include "utils/fmgroids.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/relcache.h" +#include "utils/snapmgr.h" +#include "utils/snapshot.h" +#include "utils/syscache.h" + +#include "vci.h" + +#include "vci_mem.h" +#include "vci_executor.h" +#include "vci_utils.h" +#include "vci_planner.h" + +static bool preanalyze_plan_tree_mutator(Plan **plan_p, Plan *parent, void *context, int eflags, bool *changed); +static bool collect_data_in_expression(Node *node, void *context); +static bool collect_data_in_initplan(Node *node, void *context); + +static bool isGatherExists; + +/** + * Analysis before plan rewrite + * + * @param[in] target Pointer to PlannedStmt holding the query + * @param[in,out] rp_context Pointer to plan rewrite information + * @param[in] eflags execution flags + * + * @return true when callback function stops cycle, false if cycle is complete + */ +bool +vci_preanalyze_plan_tree(PlannedStmt *target, vci_rewrite_plan_context_t *rp_context, int eflags, bool *isGather) +{ + bool dummy; + int nParamExec; + + /* + * Scans target's plan tree and gathers information. Use + * vci_plannedstmt_tree_mutator () instead of vci_plannedstmt_tree_walker + * () because target cannot be written but eflags information is collected + * for plan nodes. + */ + if (vci_plannedstmt_tree_mutator(target, preanalyze_plan_tree_mutator, vci_register_plan_id, rp_context, eflags, &dummy)) + { + *isGather = isGatherExists; + return true; + } + + nParamExec = list_length(target->paramExecTypes); + for (int i = 0; i < nParamExec; i++) + { + rp_context->param_exec_attr_map[i].num_def_plans = bms_num_members(rp_context->param_exec_attr_map[i].def_plan_nos); + rp_context->param_exec_attr_map[i].num_use_plans = bms_num_members(rp_context->param_exec_attr_map[i].use_plan_nos); + } + *isGather = isGatherExists; + return false; +} + +/** + * Callback function to record Topmost plan node and subplan number + * + * @param[in] plan Topmost plan node + * @param[in] plan_id subplan number + * @param[in,out] context Pointer to plan rewrite information + * + * This function specifies topmostplan for vci_plannedstmt_tree_mutator(). + */ +void +vci_register_plan_id(Plan *plan, int plan_id, void *context) +{ + vci_rewrite_plan_context_t *rp_context = (vci_rewrite_plan_context_t *) context; + + rp_context->current_plan_id = plan_id; + + rp_context->subplan_attr_map[plan_id].topmostplan = plan; +} + +/** + * Analysis before plan rewrite + * + * @param[in] plan_p Pointer to a pointer to plan node + * @param[in] parent Pointer to plan node that is the parent of *plan_p + * @param[in,out] context Pointer to plan rewrite information + * @param[in] eflags execution flags + * @param[out] changed Set true when plan tree has been rewritten + * + * @return true when callback function stops cycle, false if cycle is complete + * + * This function is specified as mutator for vci_plannedstmt_tree_mutator(). + * Since the plan tree is not rewritten, nothing is written to *changed. + */ +static bool +preanalyze_plan_tree_mutator(Plan **plan_p, Plan *parent, void *context, int eflags, bool *changed) +{ + vci_rewrite_plan_context_t *rp_context = (vci_rewrite_plan_context_t *) context; + Plan *plan; + AttrNumber plan_no; + bool saved_forbid_parallel_exec; + bool result; + + plan = *plan_p; + + Assert(plan->plan_no == 0); + plan_no = plan->plan_no = ++rp_context->last_plan_no; + + /* If the capacity of plan_attr_map[] is insufficient, double it */ + vci_expand_plan_attr_map(rp_context); + + rp_context->plan_attr_map[plan_no].preset_eflags = eflags; + + rp_context->current_plan_no = plan_no; + + /* + * Investigate plan nodes that prohibit parallel execution Set + * rp_context->forbid_parallel_exec to false and scan subplan tree + */ + saved_forbid_parallel_exec = rp_context->forbid_parallel_exec; + rp_context->forbid_parallel_exec = false; + + /* Scan expression tree included in plan and collect data */ + vci_expression_and_initplan_walker(plan, collect_data_in_expression, collect_data_in_initplan, context); + + switch (nodeTag(plan)) + { + case T_SubqueryScan: /* Since using VCI custom scan for initplans + * slows down the performance, block VCI scan + * to be replaced for subquery scan */ + return true; + case T_ModifyTable: + case T_TidScan: + case T_TidRangeScan: + case T_FunctionScan: + case T_ValuesScan: + case T_CteScan: + case T_ForeignScan: + case T_CustomScan: + case T_CustomPlanMarkPos: + case T_LockRows: + rp_context->forbid_parallel_exec = true; + break; + case T_Gather: + case T_GatherMerge: + + /* + * Verify the targetlist of Gather node and underlying node is + * same or not. VCI scan replacement assumes Gather node and + * underlying node has same targetlist. But, in some scenarios it + * is not the case. So, avoid rewriting VCI plan where Gather node + * has different targetlist than underlying node. E.g: SELECT c2, + * (select key from testtable1 where key=1 ) FROM testtable2 where + * c1 = 1 limit 1. + */ + + if (list_length(plan->targetlist) != list_length(plan->lefttree->targetlist)) + return true; + + /* + * Set the flag to verify the presence of Gather node in current + * query plan generated by OSS. If there are no Gather nodes + * present, then the step to update the query plan to remove + * Gather node can be skipped. This way unnecessary recursive + * function calls to remove Gather nodes will be skipped when + * there are no Gather plan exists in query plan + */ + + isGatherExists = true; + break; + case T_NestLoop: + { + NestLoop *nl; + ListCell *lc; + + nl = (NestLoop *) plan; + + foreach(lc, nl->nestParams) + { + NestLoopParam *nlp = (NestLoopParam *) lfirst(lc); + int paramid = nlp->paramno; + + if ((rp_context->param_exec_attr_map[paramid].type != VCI_PARAM_EXEC_UNKNOWN) && + (rp_context->param_exec_attr_map[paramid].type != VCI_PARAM_EXEC_NESTLOOP)) + return true; + + rp_context->param_exec_attr_map[paramid].type = VCI_PARAM_EXEC_NESTLOOP; + rp_context->param_exec_attr_map[paramid].def_plan_nos = + bms_add_member(rp_context->param_exec_attr_map[paramid].def_plan_nos, plan_no); + rp_context->plan_attr_map[plan_no].def_param_ids = + bms_add_member(rp_context->plan_attr_map[plan_no].def_param_ids, paramid); + + if (bms_is_member(paramid, rp_context->plan_attr_map[plan_no].use_param_ids)) + return true; + } + } + break; + + default: + break; + } + + rp_context->current_plan_no = 0; + + result = vci_plan_tree_mutator(plan_p, parent, preanalyze_plan_tree_mutator, context, eflags, changed); + + rp_context->plan_attr_map[plan_no].plan_compat = rp_context->forbid_parallel_exec ? VCI_PLAN_COMPAT_FORBID_TYPE : VCI_PLAN_COMPAT_OK; + rp_context->forbid_parallel_exec |= saved_forbid_parallel_exec; + + return result; +} + +/** + * Search expression tree and collect data related to PARAM_EXEC type Param + * and subquery calls. + */ +static bool +collect_data_in_expression(Node *node, void *context) +{ + vci_rewrite_plan_context_t *rp_context = (vci_rewrite_plan_context_t *) context; + AttrNumber plan_no; + + if (node == NULL) + return false; + + plan_no = rp_context->current_plan_no; + + switch (nodeTag(node)) + { + case T_SubPlan: + { + SubPlan *subplan = (SubPlan *) node; + ListCell *lc; + + if ((rp_context->subplan_attr_map[subplan->plan_id].type != VCI_SUBPLAN_UNKNOWN) && + (rp_context->subplan_attr_map[subplan->plan_id].type != VCI_SUBPLAN_SUBPLAN)) + return true; + + rp_context->subplan_attr_map[subplan->plan_id].type = VCI_SUBPLAN_SUBPLAN; + rp_context->subplan_attr_map[rp_context->current_plan_id].plan_ids = + bms_add_member(rp_context->subplan_attr_map[rp_context->current_plan_id].plan_ids, subplan->plan_id); + + foreach(lc, subplan->parParam) + { + int paramid = lfirst_int(lc); + + if ((rp_context->param_exec_attr_map[paramid].type != VCI_PARAM_EXEC_UNKNOWN) && + (rp_context->param_exec_attr_map[paramid].type != VCI_PARAM_EXEC_SUBPLAN)) + return true; + + rp_context->param_exec_attr_map[paramid].type = VCI_PARAM_EXEC_SUBPLAN; + rp_context->param_exec_attr_map[paramid].def_plan_nos = + bms_add_member(rp_context->param_exec_attr_map[paramid].def_plan_nos, plan_no); + rp_context->plan_attr_map[plan_no].def_param_ids = + bms_add_member(rp_context->plan_attr_map[plan_no].def_param_ids, paramid); + } + + return expression_tree_walker((Node *) subplan->args, collect_data_in_expression, context); + } + + case T_Param: + { + Param *param = (Param *) node; + + if (param->paramkind == PARAM_EXEC) + { + int paramid = param->paramid; + + rp_context->param_exec_attr_map[paramid].use_plan_nos = + bms_add_member(rp_context->param_exec_attr_map[paramid].use_plan_nos, plan_no); + rp_context->plan_attr_map[plan_no].use_param_ids = + bms_add_member(rp_context->plan_attr_map[plan_no].use_param_ids, paramid); + + if (rp_context->param_exec_attr_map[paramid].type == VCI_PARAM_EXEC_INITPLAN) + { + rp_context->param_exec_attr_map[paramid].def_plan_nos = + bms_add_member(rp_context->param_exec_attr_map[paramid].def_plan_nos, plan_no); + rp_context->plan_attr_map[plan_no].def_param_ids = + bms_add_member(rp_context->plan_attr_map[plan_no].def_param_ids, paramid); + } + } + } + return false; + + default: + break; + } + + return expression_tree_walker(node, collect_data_in_expression, context); +} + +/** + * Search for initPlan and analyze SubPlan + */ +static bool +collect_data_in_initplan(Node *node, void *context) +{ + vci_rewrite_plan_context_t *rp_context = (vci_rewrite_plan_context_t *) context; + + if (node == NULL) + return false; + + if (IsA(node, SubPlan)) + { + SubPlan *subplan = (SubPlan *) node; + ListCell *lc; + + if ((rp_context->subplan_attr_map[subplan->plan_id].type != VCI_SUBPLAN_UNKNOWN) && + (rp_context->subplan_attr_map[subplan->plan_id].type != VCI_SUBPLAN_INITPLAN)) + return true; + + rp_context->subplan_attr_map[subplan->plan_id].type = VCI_SUBPLAN_INITPLAN; + rp_context->subplan_attr_map[rp_context->current_plan_id].plan_ids = + bms_add_member(rp_context->subplan_attr_map[rp_context->current_plan_id].plan_ids, subplan->plan_id); + + foreach(lc, subplan->setParam) + { + int paramid = lfirst_int(lc); + + if ((rp_context->param_exec_attr_map[paramid].type != VCI_PARAM_EXEC_UNKNOWN) && + (rp_context->param_exec_attr_map[paramid].type != VCI_PARAM_EXEC_INITPLAN)) + return true; + + rp_context->param_exec_attr_map[paramid].type = VCI_PARAM_EXEC_INITPLAN; + + rp_context->param_exec_attr_map[paramid].plan_id = subplan->plan_id; + } + + return false; + } + + return expression_tree_walker(node, collect_data_in_initplan, context); +} + +/** + * Expand array of analysis data for each plan node as necessary + */ +void +vci_expand_plan_attr_map(vci_rewrite_plan_context_t *rp_context) +{ + if (rp_context->max_plan_attrs <= rp_context->last_plan_no) + { + int old_max_plan_attrs = rp_context->max_plan_attrs; + vci_plan_attr_t *old_plan_attr_map = rp_context->plan_attr_map; + + rp_context->max_plan_attrs *= 2; + rp_context->plan_attr_map = palloc0_array(vci_plan_attr_t, rp_context->max_plan_attrs); + + for (int i = 0; i < old_max_plan_attrs; i++) + rp_context->plan_attr_map[i] = old_plan_attr_map[i]; + + pfree(old_plan_attr_map); + } +} + +vci_inner_plan_type_t +vci_get_inner_plan_type(vci_rewrite_plan_context_t *context, const Plan *plan) +{ + Assert(plan->plan_no > 0); + + return context->plan_attr_map[plan->plan_no].plan_type; +} + +AttrNumber +vci_get_inner_scan_plan_no(vci_rewrite_plan_context_t *context, const Plan *plan) +{ + Assert(plan->plan_no > 0); + + return context->plan_attr_map[plan->plan_no].scan_plan_no; +} + +void +vci_set_inner_plan_type_and_scan_plan_no(vci_rewrite_plan_context_t *context, Plan *plan, vci_inner_plan_type_t plan_type, AttrNumber scan_plan_no) +{ + Assert(plan->plan_no > 0); + + context->plan_attr_map[plan->plan_no].plan_type = plan_type; + context->plan_attr_map[plan->plan_no].scan_plan_no = scan_plan_no; +} diff --git a/contrib/vci/executor/vci_scan.c b/contrib/vci/executor/vci_scan.c new file mode 100644 index 0000000..008fc68 --- /dev/null +++ b/contrib/vci/executor/vci_scan.c @@ -0,0 +1,631 @@ +/*------------------------------------------------------------------------- + * + * vci_scan.c + * Routines to handle VCI Scan nodes + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/executor/vci_scan.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/relscan.h" +#include "commands/explain.h" +#include "commands/explain_format.h" +#include "executor/executor.h" +#include "executor/nodeCustom.h" +#include "executor/nodeSubplan.h" +#include "miscadmin.h" +#include "optimizer/cost.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/plancat.h" +#include "optimizer/planmain.h" +#include "optimizer/planner.h" +#include "optimizer/restrictinfo.h" +#include "parser/parsetree.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" + +#include "vci.h" +#include "vci_executor.h" +#include "vci_utils.h" +#include "vci_fetch_row_store.h" + +static Node *vci_scan_CreateCustomScanState(CustomScan *cscan); + +/* +* Declarations of Custom Scan Methods callbacks +*/ +static void vci_scan_BeginCustomPlan(CustomScanState *node, EState *estate, int eflags); + +static void vci_scan_BeginCustomPlan_postprocess_enabling_vp(VciScan *scan, VciScanState *scanstate); +static TupleTableSlot *vci_scan_ExecCustomPlan(CustomScanState *node); +static void vci_scan_EndCustomPlan(CustomScanState *node); + +static void vci_scan_ReScanCustomPlan(CustomScanState *node); +static void vci_scan_MarkPosCustomPlan(CustomScanState *cpstate); +static void vci_scan_RestrPosCustomPlan(CustomScanState *cpstate); + +static void vci_scan_ExplainCustomPlanTargetRel(CustomScanState *node, ExplainState *es); +static CustomScan *vci_scan_CopyCustomPlan(const CustomScan *_from); + +static int exec_proc_scan_vector(VciScanState *scanstate); +static TupleTableSlot *exec_custom_plan_enabling_vp(VciScanState *scanstate); + +/*****************************************************************************/ +/* Column-store (basic) */ +/*****************************************************************************/ + +static Node * +vci_scan_CreateCustomScanState(CustomScan *cscan) +{ + VciScan *vscan; + VciScanState *vss = palloc0_object(VciScanState); + + vscan = (VciScan *) cscan; + + vss->vci.css.ss.ps.type = T_CustomScanState; + vss->vci.css.ss.ps.plan = (Plan *) vscan; + vss->vci.css.flags = cscan->flags; + + switch (vscan->scan_mode) + { + case VCI_SCAN_MODE_COLUMN_STORE: + vss->vci.css.methods = &vci_scan_exec_column_store_methods; + break; + + default: + Assert(0); + break; + } + return (Node *) vss; +} + +static void +vci_scan_BeginCustomPlan(CustomScanState *node, EState *estate, int eflags) +{ + VciScanState *scanstate = (VciScanState *) node; + VciScan *scan = (VciScan *) node->ss.ps.plan; + Relation currentRelation; + TableScanDesc currentScanDesc; + vci_initexpr_t initexpr = VCI_INIT_EXPR_NONE; + TupleDesc scanDesc; + + Assert(scanstate->vci.css.ss.ps.type == T_CustomScanState); + + if (ScanDirectionIsBackward(estate->es_direction)) + elog(ERROR, "VCI Scan does not support backward scan"); + + switch (scan->scan_mode) + { + case VCI_SCAN_MODE_COLUMN_STORE: + initexpr = VCI_INIT_EXPR_FETCHING_COLUMN_STORE; + break; + + default: + Assert(0); + break; + } + + /* + * create state structure + */ + scanstate->is_subextent_grain = scan->is_subextent_grain; + scanstate->vci.css.ss.ps.state = estate; + + /* create expression context for node */ + ExecAssignExprContext(estate, &scanstate->vci.css.ss.ps); + + /* initialize child expressions */ + scanstate->vci.css.ss.ps.qual = + VciExecInitQual(scan->vci.cscan.scan.plan.qual, &scanstate->vci.css.ss.ps, + initexpr); + + if (scan->scan_mode == VCI_SCAN_MODE_COLUMN_STORE) + { + vci_create_one_fetch_context_for_fetching_column_store(scanstate, scanstate->vci.css.ss.ps.ps_ExprContext); + } + + switch (scan->scan_mode) + { + case VCI_SCAN_MODE_COLUMN_STORE: + + /* + * get the relation object id from the relid'th entry in the range + * table, open that relation and acquire appropriate lock on it. + */ + currentRelation = ExecOpenScanRelation(estate, scan->scanrelid, eflags); + + /* initialize a heapscan */ + currentScanDesc = table_beginscan(currentRelation, + estate->es_snapshot, + 0, + NULL); + + scanstate->vci.css.ss.ss_currentRelation = currentRelation; + scanstate->vci.css.ss.ss_currentScanDesc = currentScanDesc; + + /* and report the scan tuple slot's rowtype */ + scanDesc = RelationGetDescr(currentRelation); + break; + + default: + outerPlanState(scanstate) = ExecInitNode(outerPlan(scan), estate, eflags); + scanDesc = ExecGetResultType(outerPlanState(scanstate)); + break; + } + + /* tuple table initialization */ + ExecInitScanTupleSlot(estate, &scanstate->vci.css.ss, scanDesc, &TTSOpsMinimalTuple); + ExecInitResultTupleSlotTL(&scanstate->vci.css.ss.ps, &TTSOpsMinimalTuple); + + /* ExecAssignScanProjectionInfo() ???? */ + if (scan->scan_mode == VCI_SCAN_MODE_COLUMN_STORE) + { + vci_scan_BeginCustomPlan_postprocess_enabling_vp(scan, scanstate); + } +} + +static void +vci_scan_BeginCustomPlan_postprocess_enabling_vp(VciScan *scan, VciScanState *scanstate) +{ + int i, + max_targetlist; + uint16 *skip_list; + ListCell *l; + + max_targetlist = list_length(scanstate->vci.css.ss.ps.plan->targetlist); + + skip_list = vci_CSGetSkipAddrFromVirtualTuples(scanstate->vector_set); + + if (scanstate->vci.css.ss.ps.qual) + { + + scanstate->vp_qual = VciBuildVectorProcessing(scanstate->vci.css.ss.ps.qual->expr, + (PlanState *) scanstate, + scanstate->vci.css.ss.ps.ps_ExprContext, + skip_list); + } + scanstate->result_values = palloc_array(Datum *, max_targetlist); + scanstate->result_isnull = palloc_array(bool *, max_targetlist); + scanstate->vp_targets = palloc0_array(VciVPContext *, max_targetlist); + + i = 0; + foreach(l, scanstate->vci.css.ss.ps.plan->targetlist) + { + TargetEntry *tle = castNode(TargetEntry, lfirst(l)); + AttrNumber resind = tle->resno - 1; + + if (tle->expr && IsA(tle->expr, Var)) + { + Var *var = (Var *) tle->expr; + int index; + + Assert(var->varno == scan->scanrelid); + + index = scanstate->attr_map[var->varattno] - 1; + + Assert(index >= 0); + Assert(index < scanstate->vector_set->num_columns); + + scanstate->result_values[resind] = vci_CSGetValueAddrFromVirtualTuplesColumnwise(scanstate->vector_set, index); + scanstate->result_isnull[resind] = vci_CSGetIsNullAddrFromVirtualTuplesColumnwise(scanstate->vector_set, index); + } + else + { + scanstate->vp_targets[i] = + VciBuildVectorProcessing((Expr *) tle->expr, + (PlanState *) scanstate, + scanstate->vci.css.ss.ps.ps_ExprContext, + skip_list); + + scanstate->result_values[resind] = scanstate->vp_targets[i]->resultValue; + scanstate->result_isnull[resind] = scanstate->vp_targets[i]->resultIsNull; + + i++; + } + } + scanstate->num_vp_targets = i; +} + +static TupleTableSlot * +vci_scan_ExecCustomPlan(CustomScanState *cstate) +{ + VciScanState *scanstate = (VciScanState *) cstate; + + Assert(scanstate->vci.css.ss.ps.type == T_CustomScanState); + + return VciExecProcScanTuple(scanstate); +} + +/** + * Processing equivalent to ExecProcNode() for VCI Scan. + * + * When calling the ExecProcNode of a lower VCI Scan from an upper VCI plan, + * overhead occurs if going through the CustomPlanState's ExecCustomPlan. + * This is a special version to avoid that. + * + * @param[in] scanstate VCI Scan state + * @return output tuple + * + * @todo This can be abolished. + */ +TupleTableSlot * +VciExecProcScanTuple(VciScanState *scanstate) +{ + TupleTableSlot *result; + PlanState *node; + bool use_instrumentation; + + node = &scanstate->vci.css.ss.ps; + + /* + * XXX - This is a workaround to make sure that the plan node we are + * reusing had not already started timing. This is needed to prevent + * "ERROR: InstrStartNode called twice in a row", which can happen for + * EXPLAIN ANALYZE SELECT ... + */ + use_instrumentation = node->instrument && INSTR_TIME_IS_ZERO(node->instrument->starttime); + + CHECK_FOR_INTERRUPTS(); + + if (node->chgParam != NULL) /* something changed */ + ExecReScan(node); /* let ReScan handle this */ + + if (use_instrumentation) + InstrStartNode(node->instrument); + + result = exec_custom_plan_enabling_vp(scanstate); + + if (use_instrumentation) + InstrStopNode(node->instrument, TupIsNull(result) ? 0.0 : 1.0); + + return result; +} + +static TupleTableSlot * +exec_custom_plan_enabling_vp(VciScanState *scanstate) +{ + ExprContext *econtext; + TupleTableSlot *outputslot; + TupleDesc tupdesc; + int slot_index; + + econtext = scanstate->vci.css.ss.ps.ps_ExprContext; + + if (!scanstate->first_fetch || (scanstate->pos.num_fetched_rows <= scanstate->pos.current_row)) + { + int result; + + ResetExprContext(econtext); + + do + { + result = exec_proc_scan_vector(scanstate); + + if (result == -1) + { + ExecClearTuple(scanstate->vci.css.ss.ss_ScanTupleSlot); + + return NULL; + } + } while (result == 0); + } + + outputslot = scanstate->vci.css.ss.ps.ps_ResultTupleSlot; + tupdesc = outputslot->tts_tupleDescriptor; + slot_index = scanstate->pos.current_row; + + ExecClearTuple(outputslot); + + for (int i = 0; i < tupdesc->natts; i++) + { + outputslot->tts_values[i] = (scanstate->result_values[i])[slot_index]; + outputslot->tts_isnull[i] = (scanstate->result_isnull[i])[slot_index]; + } + + ExecStoreVirtualTuple(outputslot); + + vci_step_next_tuple_from_column_store(scanstate); + + return outputslot; +} + +int +VciExecProcScanVector(VciScanState *scanstate) +{ + int result; + PlanState *node; + + node = &scanstate->vci.css.ss.ps; + + CHECK_FOR_INTERRUPTS(); + + if (node->chgParam != NULL) /* something changed */ + ExecReScan(node); /* let ReScan handle this */ + + if (node->instrument) + InstrStartNode(node->instrument); + + do + { + result = exec_proc_scan_vector(scanstate); + + if (result == -1) + { + ExecClearTuple(scanstate->vci.css.ss.ss_ScanTupleSlot); + + result = 0; + break; + } + } while (result == 0); + + if (node->instrument) + InstrStopNode(node->instrument, 1.0 * result); + + return result; +} + +static int +exec_proc_scan_vector(VciScanState *scanstate) +{ + int max_slots; + int num_slots = 0; + int slot_index; + int check_slot_index; + ExprContext *econtext; + ExprState *qual; + TupleTableSlot *old_tts; + MemoryContext oldContext; + uint16 *skip_list; + + econtext = scanstate->vci.css.ss.ps.ps_ExprContext; + qual = scanstate->vci.css.ss.ps.qual; + + CHECK_FOR_INTERRUPTS(); + + ResetExprContext(econtext); + + if (!vci_fill_vector_set_from_column_store(scanstate)) + return -1; + + old_tts = econtext->ecxt_scantuple; + econtext->ecxt_scantuple = NULL; /* safety */ + max_slots = scanstate->pos.num_fetched_rows; + + Assert(max_slots > 0); + + oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); + + num_slots = 0; + skip_list = vci_CSGetSkipFromVirtualTuples(scanstate->vector_set); + slot_index = skip_list[0]; + check_slot_index = 0; + + if (qual) + { + VciVPContext *vpcontext = scanstate->vp_qual; + + VciExecEvalVectorProcessing(vpcontext, econtext, max_slots); + + for (; slot_index < max_slots; slot_index += skip_list[slot_index + 1] + 1) + { + if (!vpcontext->resultIsNull[slot_index] && DatumGetBool(vpcontext->resultValue[slot_index])) + { + check_slot_index = slot_index + 1; + num_slots++; + } + else + { + InstrCountFiltered1(&scanstate->vci.css.ss, 1); + skip_list[check_slot_index] += skip_list[slot_index + 1] + 1; + } + } + + scanstate->pos.current_row = skip_list[0]; + + VciExecTargetListWithVectorProcessing(scanstate, econtext, max_slots); + } + else + { + VciExecTargetListWithVectorProcessing(scanstate, econtext, max_slots); + + for (; slot_index < max_slots; slot_index += skip_list[slot_index + 1] + 1) + num_slots++; + } + + MemoryContextSwitchTo(oldContext); + + econtext->ecxt_scantuple = old_tts; + + if (num_slots == 0) + { + scanstate->pos.current_row = scanstate->pos.num_fetched_rows; + return 0; + } + + return max_slots; +} + +static void +vci_scan_EndCustomPlan(CustomScanState *node) +{ + VciScan *scan; + VciScanState *scanstate = (VciScanState *) node; + TableScanDesc scanDesc; + + Assert(scanstate->vci.css.ss.ps.type == T_CustomScanState); + + scan = (VciScan *) scanstate->vci.css.ss.ps.plan; + + scanDesc = scanstate->vci.css.ss.ss_currentScanDesc; + + switch (scan->scan_mode) + { + case VCI_SCAN_MODE_COLUMN_STORE: + vci_destroy_one_fetch_context_for_fetching_column_store(scanstate); + + /* close the heap scan */ + table_endscan(scanDesc); + + break; + + default: + /* LCOV_EXCL_START */ + elog(PANIC, "Should not reach here"); + /* LCOV_EXCL_STOP */ + break; + } +} + +static void +vci_scan_ReScanCustomPlan(CustomScanState *node) +{ + VciScanState *scanstate; + + scanstate = (VciScanState *) node; + + Assert(scanstate->vci.css.ss.ps.type == T_CustomScanState); + /* Rescan EvalPlanQual tuple if we're inside an EvalPlanQual recheck */ + Assert(scanstate->vci.css.ss.ps.state->es_epq_active == NULL); + + scanstate->first_fetch = false; +} + +static void +vci_scan_MarkPosCustomPlan(CustomScanState *node) +{ + VciScanState *scanstate = (VciScanState *) node; + + Assert(scanstate->vci.css.ss.ps.type == T_CustomScanState); + + vci_mark_pos_vector_set_from_column_store(scanstate); +} + +static void +vci_scan_RestrPosCustomPlan(CustomScanState *node) +{ + VciScanState *scanstate = (VciScanState *) node; + + Assert(scanstate->vci.css.ss.ps.type == T_CustomScanState); + + ExecClearTuple(scanstate->vci.css.ss.ss_ScanTupleSlot); + + vci_restr_pos_vector_set_from_column_store(scanstate); +} + +static void +vci_scan_ExplainCustomPlanTargetRel(CustomScanState *node, ExplainState *es) +{ + VciScanState *scanstate; + VciScan *scan; + Index scanrelid; + char *refname; + char *objectname = NULL; + char *namespace = NULL; + const char *indexname = NULL; + RangeTblEntry *rte; + + scanstate = (VciScanState *) node; + Assert(scanstate->vci.css.ss.ps.type == T_CustomScanState); + scan = (VciScan *) scanstate->vci.css.ss.ps.plan; + scanrelid = scan->scanrelid; + + rte = rt_fetch(scanrelid, es->rtable); + Assert(rte->rtekind == RTE_RELATION); + + refname = (char *) list_nth(es->rtable_names, scanrelid - 1); + if (refname == NULL) + refname = rte->eref->aliasname; + objectname = get_rel_name(rte->relid); + if (es->verbose) + namespace = get_namespace_name(get_rel_namespace(rte->relid)); + + indexname = get_rel_name(scan->indexoid); + if (indexname == NULL) + elog(ERROR, "cache lookup failed for index %u", scan->indexoid); + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + appendStringInfo(es->str, " using %s on", + quote_identifier(indexname)); + + if (namespace != NULL) + appendStringInfo(es->str, " %s.%s", quote_identifier(namespace), + quote_identifier(objectname)); + else if (objectname != NULL) + appendStringInfo(es->str, " %s", quote_identifier(objectname)); + if (objectname == NULL || strcmp(refname, objectname) != 0) + appendStringInfo(es->str, " %s", quote_identifier(refname)); + } + else + { + ExplainPropertyText("Index Name", indexname, es); + if (objectname != NULL) + ExplainPropertyText("Relation Name", objectname, es); + if (namespace != NULL) + ExplainPropertyText("Schema", namespace, es); + ExplainPropertyText("Alias", refname, es); + } +} + +static CustomScan * +vci_scan_CopyCustomPlan(const CustomScan *_from) +{ + const VciScan *from = (const VciScan *) _from; + VciScan *newnode; + + newnode = palloc0_object(VciScan); + + vci_copy_plan(&newnode->vci, &from->vci); + + newnode->scan_mode = from->scan_mode; + newnode->scanrelid = from->scanrelid; + newnode->reloid = from->reloid; + newnode->indexoid = from->indexoid; + newnode->attr_used = bms_copy(from->attr_used); + newnode->num_attr_used = from->num_attr_used; + newnode->is_all_simple_vars = from->is_all_simple_vars; + newnode->estimate_tuples = from->estimate_tuples; + newnode->is_subextent_grain = from->is_subextent_grain; + newnode->index_ph_id = from->index_ph_id; + newnode->fetch_ph_id = from->fetch_ph_id; + + ((Node *) newnode)->type = nodeTag((Node *) from); + + return &newnode->vci.cscan; +} + +/*****************************************************************************/ +/* Callback */ +/*****************************************************************************/ + +CustomScanMethods vci_scan_scan_methods = { + "VCI Scan", + vci_scan_CreateCustomScanState, + vci_scan_CopyCustomPlan +}; + +CustomExecMethods vci_scan_exec_column_store_methods = { + "VCI Scan", + vci_scan_BeginCustomPlan, + vci_scan_ExecCustomPlan, + vci_scan_EndCustomPlan, + vci_scan_ReScanCustomPlan, + vci_scan_MarkPosCustomPlan, + vci_scan_RestrPosCustomPlan, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + vci_scan_ExplainCustomPlanTargetRel +}; diff --git a/contrib/vci/executor/vci_sort.c b/contrib/vci/executor/vci_sort.c new file mode 100644 index 0000000..7daa3a4 --- /dev/null +++ b/contrib/vci/executor/vci_sort.c @@ -0,0 +1,413 @@ +/*------------------------------------------------------------------------- + * + * vci_sort.c + * Routines to handle VCI Agg nodes + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/executor/vci_sort.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "commands/explain.h" +#include "commands/explain_format.h" +#include "executor/execdebug.h" +#include "executor/executor.h" +#include "executor/nodeCustom.h" +#include "miscadmin.h" +#include "optimizer/cost.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/plancat.h" +#include "optimizer/planmain.h" +#include "optimizer/planner.h" +#include "optimizer/restrictinfo.h" +#include "utils/tuplesort.h" + +#include "vci.h" +#include "vci_executor.h" +#include "vci_mem.h" + +/* ---------------- + * VCI Sort information + * ---------------- + */ +static Node * +vci_sort_CreateCustomScanState(CustomScan *cs) +{ + VciSort *vsort; + VciSortState *vss = palloc0_object(VciSortState); + + vsort = (VciSort *) cs; + + vss->vci.css.ss.ps.type = T_CustomScanState; + vss->vci.css.ss.ps.plan = (Plan *) vsort; + vss->vci.css.flags = cs->flags; + vss->vci.css.methods = &vci_sort_exec_methods; + + return (Node *) vss; +} + +static TupleTableSlot * +vci_sort_ExecCustomPlan(CustomScanState *node) +{ + EState *estate; + ScanDirection dir; + Tuplesortstate *tuplesortstate; + TupleTableSlot *slot; + VciSortState *sortstate; + + sortstate = (VciSortState *) node; + + SO1_printf("ExecCustomSort: %s\n", + "entering routine"); + + estate = sortstate->vci.css.ss.ps.state; + dir = estate->es_direction; + tuplesortstate = (Tuplesortstate *) sortstate->tuplesortstate; + + if (!sortstate->sort_Done) + { + PlanState *outerNode; + + SO1_printf("ExecCustomSort: %s\n", + "custom sorting subplan"); + + SO1_printf("ExecCustomSort: %s\n", + "calling tuplesort_begin"); + + outerNode = outerPlanState(node); + + tuplesortstate = vci_sort_exec_top_half(sortstate); + + for (;;) + { + VciScanState *scanstate = (VciScanState *) outerNode; + Assert(scanstate->vci.css.ss.ps.type == T_CustomScanState); + slot = VciExecProcScanTuple(scanstate); + + if (TupIsNull(slot)) + break; + + tuplesort_puttupleslot(tuplesortstate, slot); + } + + vci_sort_perform_sort(sortstate); + + sortstate->sort_Done = true; + sortstate->bounded_Done = sortstate->bounded; + sortstate->bound_Done = sortstate->bound; + + SO1_printf("ExecCustomSort: %s\n", "sorting done"); + } + + SO1_printf("ExecCustomSort: %s\n", + "retrieving tuple from tuplesort"); + + slot = sortstate->vci.css.ss.ps.ps_ResultTupleSlot; + + tuplesort_gettupleslot(tuplesortstate, + ScanDirectionIsForward(dir), false, + slot, NULL); + return slot; +} + +Tuplesortstate * +vci_sort_exec_top_half(VciSortState *sortstate) +{ + EState *estate; + Tuplesortstate *tuplesortstate; + VciSort *plannode = (VciSort *) sortstate->vci.css.ss.ps.plan; + PlanState *outerNode; + TupleDesc tupDesc; + int tuplesortopts = TUPLESORT_NONE; + + estate = sortstate->vci.css.ss.ps.state; + sortstate->saved_dir = estate->es_direction; + tuplesortstate = (Tuplesortstate *) sortstate->tuplesortstate; + + estate->es_direction = ForwardScanDirection; + + outerNode = outerPlanState(sortstate); + tupDesc = ExecGetResultType(outerNode); + + if (sortstate->randomAccess) + tuplesortopts |= TUPLESORT_RANDOMACCESS; + if (sortstate->bounded) + tuplesortopts |= TUPLESORT_ALLOWBOUNDED; + + tuplesortstate = tuplesort_begin_heap(tupDesc, + plannode->numCols, + plannode->sortColIdx, + plannode->sortOperators, + plannode->collations, + plannode->nullsFirst, + work_mem, + NULL, + tuplesortopts); + + if (sortstate->bounded) + tuplesort_set_bound(tuplesortstate, sortstate->bound); + + sortstate->tuplesortstate = (void *) tuplesortstate; + + return tuplesortstate; +} + +void +vci_sort_perform_sort(VciSortState *sortstate) +{ + EState *estate; + Tuplesortstate *tuplesortstate; + + estate = sortstate->vci.css.ss.ps.state; + tuplesortstate = (Tuplesortstate *) sortstate->tuplesortstate; + + tuplesort_performsort(tuplesortstate); + + estate->es_direction = sortstate->saved_dir; +} + +static void +vci_sort_BeginCustomPlan(CustomScanState *node, EState *estate, int eflags) +{ + VciSort *sort; + VciSortState *sortstate; + + SO1_printf("vci_sort_BeginCustomPlan: %s\n", + "initializing custom sort node"); + + sort = (VciSort *) node->ss.ps.plan; + + /* + * create state structure + */ + sortstate = (VciSortState *) node; + + sortstate->vci.css.ss.ps.state = estate; + + sortstate->randomAccess = (eflags & (EXEC_FLAG_REWIND | + EXEC_FLAG_BACKWARD | + EXEC_FLAG_MARK)) != 0; + + sortstate->bounded = false; + sortstate->sort_Done = false; + sortstate->tuplesortstate = NULL; + + /* + * initialize child nodes + * + * We shield the child node from the need to support REWIND, BACKWARD, or + * MARK/RESTORE. + */ + + eflags &= ~(EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK); + + outerPlanState(sortstate) = ExecInitNode(outerPlan(sort), estate, eflags); + + /* + * Initialize scan slot and type. + */ + ExecCreateScanSlotFromOuterPlan(estate, &sortstate->vci.css.ss, &TTSOpsVirtual); + + /* + * Initialize return slot and type. No need to initialize projection info + * because this node doesn't do projections. + */ + ExecInitResultTupleSlotTL(&sortstate->vci.css.ss.ps, &TTSOpsMinimalTuple); + sortstate->vci.css.ss.ps.ps_ProjInfo = NULL; + + SO1_printf("vci_sort_BeginCustomPlan: %s\n", + "sort node initialized"); +} + +static void +vci_sort_EndCustomPlan(CustomScanState *node) +{ + VciSortState *sortstate; + + sortstate = (VciSortState *) node; + + SO1_printf("ExecEndSort: %s\n", + "shutting down custom sort node"); + + ExecClearTuple(sortstate->vci.css.ss.ss_ScanTupleSlot); + ExecClearTuple(sortstate->vci.css.ss.ps.ps_ResultTupleSlot); + + if (sortstate->tuplesortstate != NULL) + tuplesort_end((Tuplesortstate *) sortstate->tuplesortstate); + + sortstate->tuplesortstate = NULL; + + ExecEndNode(outerPlanState(sortstate)); + + SO1_printf("ExecEndSort: %s\n", + "VCI Sort node shutdown"); +} + +static void +vci_sort_ReScanCustomPlan(CustomScanState *node) +{ + VciSortState *sortstate; + + sortstate = (VciSortState *) node; + + if (!sortstate->sort_Done) + return; + + ExecClearTuple(sortstate->vci.css.ss.ps.ps_ResultTupleSlot); + + if (sortstate->vci.css.ss.ps.lefttree->chgParam != NULL || + sortstate->bounded != sortstate->bounded_Done || + sortstate->bound != sortstate->bound_Done || + !sortstate->randomAccess) + { + sortstate->sort_Done = false; + tuplesort_end((Tuplesortstate *) sortstate->tuplesortstate); + sortstate->tuplesortstate = NULL; + + if (sortstate->vci.css.ss.ps.lefttree->chgParam == NULL) + ExecReScan(sortstate->vci.css.ss.ps.lefttree); + } + else + tuplesort_rescan((Tuplesortstate *) sortstate->tuplesortstate); +} + +/* LCOV_EXCL_START */ + +static void +vci_sort_MarkPosCustomPlan(CustomScanState *node) +{ + + elog(PANIC, "VCI Sort does not support MarkPosCustomPlan call convention"); + +} + +static void +vci_sort_RestrPosCustomPlan(CustomScanState *node) +{ + elog(PANIC, "VCI Sort does not support RestrPosCustomPlan call convention"); +} + +/* LCOV_EXCL_STOP */ + +static void +vci_sort_ExplainCustomPlan(CustomScanState *csstate, + List *ancestors, + ExplainState *es) +{ + VciSortState *sortstate = (VciSortState *) csstate; + VciSort *sort = (VciSort *) csstate->ss.ps.plan; + + ExplainPropertySortGroupKeys(&csstate->ss.ps, "Sort Key", + sort->numCols, sort->sortColIdx, + ancestors, es); + + if (es->analyze && sortstate->sort_Done && + sortstate->tuplesortstate != NULL) + { + Tuplesortstate *state = (Tuplesortstate *) sortstate->tuplesortstate; + TuplesortInstrumentation stats; + const char *sortMethod; + const char *spaceType; + int64 spaceUsed; + + tuplesort_get_stats(state, &stats); + sortMethod = tuplesort_method_name(stats.sortMethod); + spaceType = tuplesort_space_type_name(stats.spaceType); + spaceUsed = stats.spaceUsed; + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + ExplainIndentText(es); + appendStringInfo(es->str, "Sort Method: %s %s: " INT64_FORMAT "kB\n", + sortMethod, spaceType, spaceUsed); + } + else + { + ExplainPropertyText("Sort Method", sortMethod, es); + ExplainPropertyInteger("Sort Space Used", "kB", spaceUsed, es); + ExplainPropertyText("Sort Space Type", spaceType, es); + } + } +} + +static CustomScan * +vci_sort_CopyCustomPlan(const CustomScan *_from) +{ + const VciSort *from = (const VciSort *) _from; + VciSort *newnode = palloc0_object(VciSort); + int numCols; + + vci_copy_plan(&newnode->vci, &from->vci); + + numCols = from->numCols; + + newnode->numCols = numCols; + + if (numCols > 0) + { + newnode->sortColIdx = palloc_array(AttrNumber, numCols); + newnode->sortOperators = palloc_array(Oid, numCols); + newnode->collations = palloc_array(Oid, numCols); + newnode->nullsFirst = palloc_array(bool, numCols); + + for (int i = 0; i < numCols; i++) + { + newnode->sortColIdx[i] = from->sortColIdx[i]; + newnode->sortOperators[i] = from->sortOperators[i]; + newnode->collations[i] = from->collations[i]; + newnode->nullsFirst[i] = from->nullsFirst[i]; + } + } + + ((Node *) newnode)->type = nodeTag((Node *) from); + + return &newnode->vci.cscan; +} + +static void +vci_sort_SetBoundCustomScan(const LimitState *node, CustomScanState *css) +{ + VciSortState *sortState = (VciSortState *) css; + int64 tuples_needed = node->count + node->offset; + + /* negative test checks for overflow in sum */ + if (node->noCount || tuples_needed < 0) + { + /* make sure flag gets reset if needed upon rescan */ + sortState->bounded = false; + } + else + { + sortState->bounded = true; + sortState->bound = tuples_needed; + } +} + +CustomScanMethods vci_sort_scan_methods = { + "VCI Sort", + vci_sort_CreateCustomScanState, + vci_sort_CopyCustomPlan +}; + +CustomExecMethods vci_sort_exec_methods = { + "VCI Sort", + vci_sort_BeginCustomPlan, + vci_sort_ExecCustomPlan, + vci_sort_EndCustomPlan, + vci_sort_ReScanCustomPlan, + vci_sort_MarkPosCustomPlan, + vci_sort_RestrPosCustomPlan, + NULL, + NULL, + NULL, + NULL, + NULL, + vci_sort_ExplainCustomPlan, + vci_sort_SetBoundCustomScan, + NULL +}; diff --git a/contrib/vci/executor/vci_vector_executor.c b/contrib/vci/executor/vci_vector_executor.c new file mode 100644 index 0000000..41be3d2 --- /dev/null +++ b/contrib/vci/executor/vci_vector_executor.c @@ -0,0 +1,2301 @@ +/*------------------------------------------------------------------------- + * + * vci_vector_executor.c + * Routines to build and evaluate vector processing object + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/executor/vci_vector_executor.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "access/relscan.h" +#include "access/transam.h" +#include "access/tupconvert.h" +#include "catalog/index.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_type.h" +#include "catalog/pg_proc.h" +#include "commands/typecmds.h" +#include "executor/execdebug.h" +#include "executor/executor.h" +#include "executor/nodeCustom.h" +#include "executor/nodeSubplan.h" +#include "fmgr.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "nodes/execnodes.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "nodes/nodes.h" +#include "optimizer/planner.h" +#include "parser/parse_coerce.h" +#include "parser/parsetree.h" +#include "pgstat.h" +#include "storage/lmgr.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/typcache.h" +#include "utils/xml.h" + +#include "vci.h" +#include "vci_executor.h" +#include "vci_utils.h" + +/* Private Structure to Vector processing */ +typedef struct FuncExprinfo +{ + FmgrInfo *finfo; + FunctionCallInfo fcinfo_data; /* arguments etc */ + PGFunction fn_addr; /* actual call address */ + int nargs; /* number of arguments */ + List *args; /* states of argument expressions */ + Oid funcid; + Oid inputcollid; +} FuncExprinfo; + +/* + * VciScalarArrayOpExprHashEntry + * Hash table entry type used during VciVPExecHashedScalarArrayOpExpr + * Copied from OSS ScalarArrayOpExprHashEntry + */ +typedef struct VciScalarArrayOpExprHashEntry +{ + Datum key; + uint32 status; /* hash status */ + uint32 hash; /* hash value (cached) */ +} VciScalarArrayOpExprHashEntry; + +#define SH_PREFIX saophash +#define SH_ELEMENT_TYPE VciScalarArrayOpExprHashEntry +#define SH_KEY_TYPE Datum +#define SH_SCOPE static inline +#define SH_DECLARE +#include "lib/simplehash.h" + +static bool saop_hash_element_match(struct saophash_hash *tb, Datum key1, + Datum key2); +static uint32 saop_element_hash(struct saophash_hash *tb, Datum key); + +/* + * VciScalarArrayOpExprHashTable + * Hash table for VciVPExecHashedScalarArrayOpExpr + * Copied from OSS ScalarArrayOpExprHashTable + */ +typedef struct VciScalarArrayOpExprHashTable +{ + saophash_hash *hashtab; /* underlying hash table */ + struct VciVPNode *pnode; +} VciScalarArrayOpExprHashTable; + +/* Define parameters for ScalarArrayOpExpr hash table code generation. */ +#define SH_PREFIX saophash +#define SH_ELEMENT_TYPE VciScalarArrayOpExprHashEntry +#define SH_KEY_TYPE Datum +#define SH_KEY key +#define SH_HASH_KEY(tb, key) saop_element_hash(tb, key) +#define SH_EQUAL(tb, a, b) saop_hash_element_match(tb, a, b) +#define SH_SCOPE static inline +#define SH_STORE_HASH +#define SH_GET_HASH(tb, a) a->hash +#define SH_DEFINE +#include "lib/simplehash.h" + +static void VciVPExecFunc(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecFunc_arg0(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecFunc_arg1(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecFunc_arg2(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecDistinctExpr(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecNullIfExpr(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecScalarArrayOpExpr(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecHashedScalarArrayOpExpr(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecNullTest(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecBooleanTest(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecNot(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecAnd_head(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecAnd_next(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecAnd_nullasfalse_next(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecOr_head(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecOr_next(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecMinMax_head(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecMinMax_next(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecCoalesce_head(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecCoalesce_next(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecCase_head(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecCase_arg(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecCase_cond(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecCase_result(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecCaseTest(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecParamExec(Expr *expression, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecCoerceViaIO(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecVar(Expr *expression, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void VciVPExecConst(Expr *expression, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +static void vci_vp_exec_simple_copy(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots); + +static VciVPContext *vci_create_vp_context(void); +static vci_vp_item_id vci_add_vp_node(VciVPExecOp_func func, Expr *expr, VciVPContext *vpcontext, int len_args, vci_vp_item_id *arg_items, bool allocValueAndIsNull, uint16 *skip_list); +static vci_vp_item_id vci_add_var_node(Var *variable, PlanState *parent, VciVPContext *vpcontext, uint16 *skip_list); +static vci_vp_item_id vci_add_param_node(Param *param, PlanState *parent, VciVPContext *vpcontext, uint16 *skip_list); +static vci_vp_item_id vci_add_const_node(Const *con, VciVPContext *vpcontext, uint16 *skip_list); +static vci_vp_item_id vci_add_control_nodes(VciVPExecOp_func head_func, VciVPExecOp_func next_func, List *args, Expr *expr, PlanState *parent, ExprContext *econtext, VciVPContext *vpcontext, uint16 *skip_list); +static vci_vp_item_id traverse_expr_state_tree(Expr *node, PlanState *parent, ExprContext *econtext, VciVPContext *vpcontext, uint16 *skip_list); + +static void VciVPExecInitFunc(Expr *node, List *args, Oid funcid, Oid inputcollid, PlanState *parent, FuncExprinfo *funcinfo); +static vci_vp_item_id vci_add_func_expr_node(Expr *expr, VciVPContext *vpcontext, FuncExprinfo *funcinfo, PlanState *parent, ExprContext *econtext, uint16 *skip_list); +static Datum VciExecEvalParamExec_vp(VciVPNode *vpnode, ExprContext *econtext, bool *isNull); + +/***************************************************************************** + * Vector processing execution function + *****************************************************************************/ + +/** + * Execute vector processing + * + * @param[in,out] vpcontext + * @param[in] econtext + * @param[in] max_slots + */ +void +VciExecEvalVectorProcessing(VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + vci_vp_item_id max; + + max = vpcontext->num_item; + + for (vci_vp_item_id i = 1; i < max; i++) + { + VciVPNode *vpnode = &vpcontext->itemNode[i]; + + vpnode->evalfunc(vpnode->expr, vpnode, vpcontext, econtext, max_slots); + } +} + +static void +VciVPExecFunc(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + + FunctionCallInfo fcinfo; + PgStat_FunctionCallUsage fcusage; + + /* inlined, simplified version of ExecEvalFuncArgs */ + fcinfo = vpnode->data.func.fcinfo_data; + + for (int slot_index = skip_list[0]; + slot_index < max_slots; + slot_index += skip_list[slot_index + 1] + 1) + { + int i; + Datum value = (Datum) 0; + bool isnull = true; + + for (i = 0; i < vpnode->len_args; i++) + { + vci_vp_item_id item = vpnode->arg_items[i]; + VciVPNode *arg_node = &vpcontext->itemNode[item]; + + fcinfo->args[i].value = arg_node->itemValue[slot_index]; + fcinfo->args[i].isnull = arg_node->itemIsNull[slot_index]; + } + + if (vpnode->data.func.finfo->fn_strict) + { + while (--i >= 0) + { + if (fcinfo->args[i].isnull) + { + goto done; + } + } + } + + pgstat_init_function_usage(fcinfo, &fcusage); + + fcinfo->isnull = false; + value = FunctionCallInvoke(fcinfo); + isnull = fcinfo->isnull; + + pgstat_end_function_usage(&fcusage, true); + +done: + itemValue[slot_index] = value; + itemIsNull[slot_index] = isnull; + } +} + +static void +VciVPExecFunc_arg0(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + + FunctionCallInfo fcinfo; + + /* PgStat_FunctionCallUsage fcusage; */ + + /* inlined, simplified version of ExecEvalFuncArgs */ + fcinfo = vpnode->data.func.fcinfo_data; + + for (int slot_index = skip_list[0]; + slot_index < max_slots; + slot_index += skip_list[slot_index + 1] + 1) + { + Datum value = (Datum) 0; + bool isnull = true; + + /* pgstat_init_function_usage(fcinfo, &fcusage); */ + + fcinfo->isnull = false; + value = FunctionCallInvoke(fcinfo); + isnull = fcinfo->isnull; + + /* pgstat_end_function_usage(&fcusage, true); */ + + itemValue[slot_index] = value; + itemIsNull[slot_index] = isnull; + } +} + +static void +VciVPExecFunc_arg1(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + + FunctionCallInfo fcinfo; + + /* PgStat_FunctionCallUsage fcusage; */ + + VciVPNode *arg_node; + + /* inlined, simplified version of ExecEvalFuncArgs */ + fcinfo = vpnode->data.func.fcinfo_data; + + arg_node = &vpcontext->itemNode[vpnode->arg_items[0]]; + + for (int slot_index = skip_list[0]; + slot_index < max_slots; + slot_index += skip_list[slot_index + 1] + 1) + { + Datum value = (Datum) 0; + bool isnull = true; + + fcinfo->args[0].value = arg_node->itemValue[slot_index]; + fcinfo->args[0].isnull = arg_node->itemIsNull[slot_index]; + + if (vpnode->data.func.finfo->fn_strict) + if (fcinfo->args[0].isnull) + goto done; + + /* pgstat_init_function_usage(fcinfo, &fcusage); */ + + fcinfo->isnull = false; + value = FunctionCallInvoke(fcinfo); + isnull = fcinfo->isnull; + + /* pgstat_end_function_usage(&fcusage, true); */ + +done: + itemValue[slot_index] = value; + itemIsNull[slot_index] = isnull; + } +} + +static void +VciVPExecFunc_arg2(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + + FunctionCallInfo fcinfo; + + /* PgStat_FunctionCallUsage fcusage; */ + + VciVPNode *arg_node0, + *arg_node1; + + /* inlined, simplified version of ExecEvalFuncArgs */ + fcinfo = vpnode->data.func.fcinfo_data; + + arg_node0 = &vpcontext->itemNode[vpnode->arg_items[0]]; + arg_node1 = &vpcontext->itemNode[vpnode->arg_items[1]]; + + for (int slot_index = skip_list[0]; + slot_index < max_slots; + slot_index += skip_list[slot_index + 1] + 1) + { + Datum value = (Datum) 0; + bool isnull = true; + + fcinfo->args[0].value = arg_node0->itemValue[slot_index]; + fcinfo->args[0].isnull = arg_node0->itemIsNull[slot_index]; + + fcinfo->args[1].value = arg_node1->itemValue[slot_index]; + fcinfo->args[1].isnull = arg_node1->itemIsNull[slot_index]; + + if (vpnode->data.func.finfo->fn_strict) + if (fcinfo->args[0].isnull || fcinfo->args[1].isnull) + goto done; + + /* pgstat_init_function_usage(fcinfo, &fcusage); */ + + fcinfo->isnull = false; + value = FunctionCallInvoke(fcinfo); + isnull = fcinfo->isnull; + + /* pgstat_end_function_usage(&fcusage, true); */ + +done: + itemValue[slot_index] = value; + itemIsNull[slot_index] = isnull; + } +} + +static void +VciVPExecDistinctExpr(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + + FunctionCallInfo fcinfo; + + /* inlined, simplified version of ExecEvalFuncArgs */ + fcinfo = vpnode->data.func.fcinfo_data; + + for (int slot_index = skip_list[0]; + slot_index < max_slots; + slot_index += skip_list[slot_index + 1] + 1) + { + Datum value = (Datum) 0; + bool isnull = false; + + Assert(vpnode->len_args == 2); + + for (int i = 0; i < 2; i++) + { + vci_vp_item_id item = vpnode->arg_items[i]; + VciVPNode *arg_node = &vpcontext->itemNode[item]; + + fcinfo->args[i].value = arg_node->itemValue[slot_index]; + fcinfo->args[i].isnull = arg_node->itemIsNull[slot_index]; + } + + if (fcinfo->args[0].isnull && fcinfo->args[1].isnull) + { + /* Both NULL? Then is not distinct... */ + value = BoolGetDatum(false); + } + else if (fcinfo->args[0].isnull || fcinfo->args[1].isnull) + { + /* Only one is NULL? Then is distinct... */ + value = BoolGetDatum(true); + } + else + { + fcinfo->isnull = false; + value = FunctionCallInvoke(fcinfo); + isnull = fcinfo->isnull; + /* Must invert result of "=" */ + value = BoolGetDatum(!DatumGetBool(value)); + } + + itemValue[slot_index] = value; + itemIsNull[slot_index] = isnull; + } +} + +static void +VciVPExecNullIfExpr(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + + FunctionCallInfo fcinfo; + + /* inlined, simplified version of ExecEvalFuncArgs */ + fcinfo = vpnode->data.func.fcinfo_data; + + for (int slot_index = skip_list[0]; + slot_index < max_slots; + slot_index += skip_list[slot_index + 1] + 1) + { + Datum value = (Datum) 0; + bool isnull = false; + + Assert(vpnode->len_args == 2); + + for (int i = 0; i < 2; i++) + { + vci_vp_item_id item = vpnode->arg_items[i]; + VciVPNode *arg_node = &vpcontext->itemNode[item]; + + fcinfo->args[i].value = arg_node->itemValue[slot_index]; + fcinfo->args[i].isnull = arg_node->itemIsNull[slot_index]; + } + + /* if either argument is NULL they can't be equal */ + if (!fcinfo->args[0].isnull && !fcinfo->args[1].isnull) + { + fcinfo->isnull = false; + value = FunctionCallInvoke(fcinfo); + /* if the arguments are equal return null */ + if (!fcinfo->isnull && DatumGetBool(value)) + { + value = (Datum) 0; + isnull = true; + goto equal_two_arguments; + } + } + + value = fcinfo->args[0].value; + isnull = fcinfo->args[0].isnull; + +equal_two_arguments: + itemValue[slot_index] = value; + itemIsNull[slot_index] = isnull; + } +} + +static void +VciVPExecScalarArrayOpExpr(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + + bool useOr = vpnode->data.scalararrayop.useOr; + FunctionCallInfo fcinfo; + + fcinfo = vpnode->data.scalararrayop.fcinfo_data; + + for (int slot_index = skip_list[0]; + slot_index < max_slots; + slot_index += skip_list[slot_index + 1] + 1) + { + ArrayType *arr; + int nitems; + Datum result; + bool resultnull; + int16 typlen; + bool typbyval; + char typalign; + char *s; + bits8 *bitmap; + int bitmask; + + result = (Datum) 0; + resultnull = false; /* Set default values for result flags: + * non-null, not a set result */ + + for (int i = 0; i < 2; i++) + { + vci_vp_item_id item = vpnode->arg_items[i]; + VciVPNode *arg_node = &vpcontext->itemNode[item]; + + fcinfo->args[i].value = arg_node->itemValue[slot_index]; + fcinfo->args[i].isnull = arg_node->itemIsNull[slot_index]; + } + + /* + * If the array is NULL then we return NULL --- it's not very + * meaningful to do anything else, even if the operator isn't strict. + */ + if (fcinfo->args[1].isnull) + { + result = (Datum) 0; + resultnull = true; + goto done; + } + + /* Else okay to fetch and detoast the array */ + arr = DatumGetArrayTypeP(fcinfo->args[1].value); + + /* + * If the array is empty, we return either FALSE or TRUE per the useOr + * flag. This is correct even if the scalar is NULL; since we would + * evaluate the operator zero times, it matters not whether it would + * want to return NULL. + */ + nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); + if (nitems <= 0) + { + result = BoolGetDatum(!useOr); + goto done; + } + + /* + * If the scalar is NULL, and the function is strict, return NULL; no + * point in iterating the loop. + */ + if (fcinfo->args[0].isnull && vpnode->data.scalararrayop.finfo->fn_strict) + { + result = (Datum) 0; + resultnull = true; + goto done; + } + + /* + * We arrange to look up info about the element type only once per + * series of calls, assuming the element type doesn't change + * underneath us. + */ + if (vpnode->data.scalararrayop.element_type != ARR_ELEMTYPE(arr)) + { + get_typlenbyvalalign(ARR_ELEMTYPE(arr), + &vpnode->data.scalararrayop.typlen, + &vpnode->data.scalararrayop.typbyval, + &vpnode->data.scalararrayop.typalign); + vpnode->data.scalararrayop.element_type = ARR_ELEMTYPE(arr); + } + typlen = vpnode->data.scalararrayop.typlen; + typbyval = vpnode->data.scalararrayop.typbyval; + typalign = vpnode->data.scalararrayop.typalign; + + result = BoolGetDatum(!useOr); + resultnull = false; + + /* Loop over the array elements */ + s = (char *) ARR_DATA_PTR(arr); + bitmap = ARR_NULLBITMAP(arr); + bitmask = 1; + + for (int i = 0; i < nitems; i++) + { + Datum elt; + Datum thisresult; + + /* Get array element, checking for NULL */ + if (bitmap && (*bitmap & bitmask) == 0) + { + fcinfo->args[1].value = (Datum) 0; + fcinfo->args[1].isnull = true; + } + else + { + elt = fetch_att(s, typbyval, typlen); + s = att_addlength_pointer(s, typlen, s); + s = (char *) att_align_nominal(s, typalign); + fcinfo->args[1].value = elt; + fcinfo->args[1].isnull = false; + } + + /* Call comparison function */ + if (fcinfo->args[1].isnull && vpnode->data.scalararrayop.finfo->fn_strict) + { + fcinfo->isnull = true; + thisresult = (Datum) 0; + } + else + { + fcinfo->isnull = false; + thisresult = FunctionCallInvoke(fcinfo); + } + + /* Combine results per OR or AND semantics */ + if (fcinfo->isnull) + resultnull = true; + else if (useOr) + { + if (DatumGetBool(thisresult)) + { + result = BoolGetDatum(true); + resultnull = false; + break; /* needn't look at any more elements */ + } + } + else + { + if (!DatumGetBool(thisresult)) + { + result = BoolGetDatum(false); + resultnull = false; + break; /* needn't look at any more elements */ + } + } + + /* advance bitmap pointer if any */ + if (bitmap) + { + bitmask <<= 1; + if (bitmask == 0x100) + { + bitmap++; + bitmask = 1; + } + } + } + +done: + itemValue[slot_index] = result; + itemIsNull[slot_index] = resultnull; + } +} + +/* + * Hash function for scalar array hash op elements. + * + * We use the element type's default hash opclass, and the column collation + * if the type is collation-sensitive. + */ +static uint32 +saop_element_hash(struct saophash_hash *tb, Datum key) +{ + VciScalarArrayOpExprHashTable *elements_tab = (VciScalarArrayOpExprHashTable *) tb->private_data; + FunctionCallInfo fcinfo = elements_tab->pnode->data.hashedscalararrayop.fcinfo_data; + Datum hash; + + fcinfo->args[0].value = key; + fcinfo->args[0].isnull = false; + + hash = elements_tab->pnode->data.hashedscalararrayop.hash_fn_addr(fcinfo); + + return DatumGetUInt32(hash); +} + +/* + * Matching function for scalar array hash op elements, to be used in hashtable + * lookups. + */ +static bool +saop_hash_element_match(struct saophash_hash *tb, Datum key1, Datum key2) +{ + Datum result; + + VciScalarArrayOpExprHashTable *elements_tab = (VciScalarArrayOpExprHashTable *) tb->private_data; + FunctionCallInfo fcinfo = elements_tab->pnode->data.hashedscalararrayop.fcinfo_data; + + fcinfo->args[0].value = key1; + fcinfo->args[0].isnull = false; + fcinfo->args[1].value = key2; + fcinfo->args[1].isnull = false; + + result = elements_tab->pnode->data.hashedscalararrayop.fn_addr(fcinfo); + + return DatumGetBool(result); +} + +static void +VciVPExecHashedScalarArrayOpExpr(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + + FunctionCallInfo fcinfo; + bool strictfunc; + Datum scalar; + bool scalar_isnull; + VciScalarArrayOpExprHashTable *elements_tab; + + fcinfo = vpnode->data.hashedscalararrayop.fcinfo_data; + strictfunc = vpnode->data.hashedscalararrayop.finfo->fn_strict; + elements_tab = vpnode->data.hashedscalararrayop.elements_tab; + + for (int slot_index = skip_list[0]; + slot_index < max_slots; + slot_index += skip_list[slot_index + 1] + 1) + { + Datum result; + bool resultnull; + bool hashfound; + + /* We don't setup a hashed scalar array op if the array const is null. */ + Assert(!fcinfo->args[1].isnull); + + for (int i = 0; i < 2; i++) + { + vci_vp_item_id item = vpnode->arg_items[i]; + VciVPNode *arg_node = &vpcontext->itemNode[item]; + + fcinfo->args[i].value = arg_node->itemValue[slot_index]; + fcinfo->args[i].isnull = arg_node->itemIsNull[slot_index]; + } + scalar = fcinfo->args[0].value; + scalar_isnull = fcinfo->args[0].isnull; + + /* + * If the scalar is NULL, and the function is strict, return NULL; no + * point in executing the search. + */ + if (fcinfo->args[0].isnull && strictfunc) + { + result = (Datum) 0; + resultnull = true; + goto done; + } + + /* Build the hash table on first evaluation */ + if (elements_tab == NULL) + { + int16 typlen; + bool typbyval; + char typalign; + int nitems; + bool has_nulls = false; + char *s; + bits8 *bitmap; + int bitmask; + MemoryContext oldcontext; + ArrayType *arr; + + arr = DatumGetArrayTypeP(fcinfo->args[1].value); + nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); + + get_typlenbyvalalign(ARR_ELEMTYPE(arr), + &typlen, + &typbyval, + &typalign); + + oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_query_memory); + + elements_tab = + palloc_object(VciScalarArrayOpExprHashTable); + vpnode->data.hashedscalararrayop.elements_tab = elements_tab; + elements_tab->pnode = vpnode; + + /* + * Create the hash table sizing it according to the number of + * elements in the array. This does assume that the array has no + * duplicates. If the array happens to contain many duplicate + * values then it'll just mean that we sized the table a bit on + * the large side. + */ + elements_tab->hashtab = saophash_create(CurrentMemoryContext, nitems, + elements_tab); + + MemoryContextSwitchTo(oldcontext); + + s = (char *) ARR_DATA_PTR(arr); + bitmap = ARR_NULLBITMAP(arr); + bitmask = 1; + for (int i = 0; i < nitems; i++) + { + /* Get array element, checking for NULL. */ + if (bitmap && (*bitmap & bitmask) == 0) + { + has_nulls = true; + } + else + { + Datum element; + + element = fetch_att(s, typbyval, typlen); + s = att_addlength_pointer(s, typlen, s); + s = (char *) att_align_nominal(s, typalign); + + saophash_insert(elements_tab->hashtab, element, &hashfound); + } + + /* Advance bitmap pointer if any. */ + if (bitmap) + { + bitmask <<= 1; + if (bitmask == 0x100) + { + bitmap++; + bitmask = 1; + } + } + } + + /* + * Remember if we had any nulls so that we know if we need to + * execute non-strict functions with a null lhs value if no match + * is found. + */ + vpnode->data.hashedscalararrayop.has_nulls = has_nulls; + } + + /* Check the hash to see if we have a match. */ + hashfound = NULL != saophash_lookup(elements_tab->hashtab, scalar); + + result = BoolGetDatum(hashfound); + resultnull = false; + + /* + * If we didn't find a match in the array, we still might need to + * handle the possibility of null values. We didn't put any NULLs + * into the hashtable, but instead marked if we found any when + * building the table in has_nulls. + */ + if (!DatumGetBool(result) && vpnode->data.hashedscalararrayop.has_nulls) + { + if (strictfunc) + { + + /* + * We have nulls in the array so a non-null lhs and no match + * must yield NULL. + */ + result = (Datum) 0; + resultnull = true; + } + else + { + /* + * Execute function will null rhs just once. + * + * The hash lookup path will have scribbled on the lhs + * argument so we need to set it up also (even though we + * entered this function with it already set). + */ + fcinfo->args[0].value = scalar; + fcinfo->args[0].isnull = scalar_isnull; + fcinfo->args[1].value = (Datum) 0; + fcinfo->args[1].isnull = true; + + result = FunctionCallInvoke(fcinfo); + resultnull = fcinfo->isnull; + } + } + +done: + itemValue[slot_index] = result; + itemIsNull[slot_index] = resultnull; + } +} + +static void +VciVPExecNullTest(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + + NullTest *ntest = (NullTest *) expr; + VciVPNode *arg_node; + + arg_node = &vpcontext->itemNode[vpnode->arg_items[0]]; + + for (int slot_index = skip_list[0]; + slot_index < max_slots; + slot_index += skip_list[slot_index + 1] + 1) + { + Datum value; + bool isnull; + + value = arg_node->itemValue[slot_index]; + isnull = arg_node->itemIsNull[slot_index]; + + Assert(!ntest->argisrow); + + /* Simple scalar-argument case, or a null rowtype datum */ + switch (ntest->nulltesttype) + { + case IS_NULL: + if (isnull) + { + value = BoolGetDatum(true); + isnull = false; + } + else + value = BoolGetDatum(false); + break; + + case IS_NOT_NULL: + if (isnull) + { + value = BoolGetDatum(false); + isnull = false; + } + else + value = BoolGetDatum(true); + break; + + default: + /* LCOV_EXCL_START */ + elog(ERROR, "unrecognized nulltesttype: %d", + (int) ntest->nulltesttype); + break; + /* LCOV_EXCL_STOP */ + } + + itemValue[slot_index] = value; + itemIsNull[slot_index] = isnull; + } +} + +static void +VciVPExecBooleanTest(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + + BooleanTest *btest = (BooleanTest *) expr; + VciVPNode *arg_node; + + arg_node = &vpcontext->itemNode[vpnode->arg_items[0]]; + + for (int slot_index = skip_list[0]; + slot_index < max_slots; + slot_index += skip_list[slot_index + 1] + 1) + { + Datum value; + bool isnull; + + value = arg_node->itemValue[slot_index]; + isnull = arg_node->itemIsNull[slot_index]; + + switch (btest->booltesttype) + { + case IS_TRUE: + if (isnull) + { + value = BoolGetDatum(false); + isnull = false; + } + else if (DatumGetBool(value)) + value = BoolGetDatum(true); + else + value = BoolGetDatum(false); + break; + + case IS_NOT_TRUE: + if (isnull) + { + value = BoolGetDatum(true); + isnull = false; + } + else if (DatumGetBool(value)) + value = BoolGetDatum(false); + else + value = BoolGetDatum(true); + break; + + case IS_FALSE: + if (isnull) + { + value = BoolGetDatum(false); + isnull = false; + } + else if (DatumGetBool(value)) + value = BoolGetDatum(false); + else + value = BoolGetDatum(true); + break; + + case IS_NOT_FALSE: + if (isnull) + { + value = BoolGetDatum(true); + isnull = false; + } + else if (DatumGetBool(value)) + value = BoolGetDatum(true); + else + value = BoolGetDatum(false); + break; + + case IS_UNKNOWN: + if (isnull) + { + value = BoolGetDatum(true); + isnull = false; + } + else + value = BoolGetDatum(false); + break; + + case IS_NOT_UNKNOWN: + if (isnull) + { + value = BoolGetDatum(false); + isnull = false; + } + else + value = BoolGetDatum(true); + break; + + default: + /* LCOV_EXCL_START */ + elog(ERROR, "unrecognized booltesttype: %d", + (int) btest->booltesttype); + break; + /* LCOV_EXCL_STOP */ + } + + itemValue[slot_index] = value; + itemIsNull[slot_index] = isnull; + } +} + +static void +VciVPExecNot(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + + VciVPNode *arg_node; + + arg_node = &vpcontext->itemNode[vpnode->arg_items[0]]; + + for (int slot_index = skip_list[0]; + slot_index < max_slots; + slot_index += skip_list[slot_index + 1] + 1) + { + Datum value; + bool isnull; + + value = arg_node->itemValue[slot_index]; + isnull = arg_node->itemIsNull[slot_index]; + + if (isnull) + value = (Datum) 0; + else + value = BoolGetDatum(!DatumGetBool(value)); + + itemValue[slot_index] = value; + itemIsNull[slot_index] = isnull; + } +} + +static void +VciVPExecAnd_head(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + for (int i = 0; i < VCI_MAX_FETCHING_ROWS; i++) + vpnode->itemValue[i] = BoolGetDatum(true); + + memset(vpnode->itemIsNull, 0, sizeof(bool) * VCI_MAX_FETCHING_ROWS); + memcpy(vpnode->skip_list, vpnode->data.init.orig_skip_list, sizeof(uint16) * VCI_MAX_SKIP_LIST_SLOTS); +} + +static void +VciVPExecAnd_next(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + + int check_slot_index = 0; + VciVPNode *arg_node; + + arg_node = &vpcontext->itemNode[vpnode->arg_items[0]]; + + for (int slot_index = skip_list[0]; + slot_index < max_slots; + slot_index += skip_list[slot_index + 1] + 1) + { + Datum value; + bool isnull; + + value = arg_node->itemValue[slot_index]; + isnull = arg_node->itemIsNull[slot_index]; + + if (isnull) + { + itemValue[slot_index] = (Datum) 0; + itemIsNull[slot_index] = true; + + check_slot_index = slot_index + 1; + } + else if (!DatumGetBool(value)) + { + itemValue[slot_index] = BoolGetDatum(false); + itemIsNull[slot_index] = false; + + skip_list[check_slot_index] += skip_list[slot_index + 1] + 1; + } + else + { + check_slot_index = slot_index + 1; + } + } +} + +static void +VciVPExecAnd_nullasfalse_next(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + + int check_slot_index = 0; + VciVPNode *arg_node; + + arg_node = &vpcontext->itemNode[vpnode->arg_items[0]]; + + for (int slot_index = skip_list[0]; + slot_index < max_slots; + slot_index += skip_list[slot_index + 1] + 1) + { + Datum value; + bool isnull; + + value = arg_node->itemValue[slot_index]; + isnull = arg_node->itemIsNull[slot_index]; + + if (isnull || !DatumGetBool(value)) + { + itemValue[slot_index] = BoolGetDatum(false); + itemIsNull[slot_index] = false; + + skip_list[check_slot_index] += skip_list[slot_index + 1] + 1; + } + else + { + check_slot_index = slot_index + 1; + } + } +} + +static void +VciVPExecOr_head(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + for (int i = 0; i < VCI_MAX_FETCHING_ROWS; i++) + vpnode->itemValue[i] = BoolGetDatum(false); + + memset(vpnode->itemIsNull, 0, sizeof(bool) * VCI_MAX_FETCHING_ROWS); + memcpy(vpnode->skip_list, vpnode->data.init.orig_skip_list, sizeof(uint16) * VCI_MAX_SKIP_LIST_SLOTS); +} + +static void +VciVPExecOr_next(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + + int check_slot_index = 0; + VciVPNode *arg_node; + + arg_node = &vpcontext->itemNode[vpnode->arg_items[0]]; + + for (int slot_index = skip_list[0]; + slot_index < max_slots; + slot_index += skip_list[slot_index + 1] + 1) + { + Datum value; + bool isnull; + + value = arg_node->itemValue[slot_index]; + isnull = arg_node->itemIsNull[slot_index]; + + if (isnull) + { + itemValue[slot_index] = (Datum) 0; + itemIsNull[slot_index] = true; + + check_slot_index = slot_index + 1; + } + else if (DatumGetBool(value)) + { + itemValue[slot_index] = BoolGetDatum(true); + itemIsNull[slot_index] = false; + + skip_list[check_slot_index] += skip_list[slot_index + 1] + 1; + } + else + { + check_slot_index = slot_index + 1; + } + } +} + +static void +VciVPExecMinMax_head(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + for (int i = 0; i < VCI_MAX_FETCHING_ROWS; i++) + vpnode->itemIsNull[i] = true; + + memcpy(vpnode->skip_list, vpnode->data.init.orig_skip_list, sizeof(uint16) * VCI_MAX_SKIP_LIST_SLOTS); +} + +static void +VciVPExecMinMax_next(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + + VciVPNode *arg_node; + FmgrInfo *finfo; + TypeCacheEntry *typentry; + + MinMaxExpr *minmax = (MinMaxExpr *) expr; + Oid collation = minmax->inputcollid; + MinMaxOp op = minmax->op; + + arg_node = &vpcontext->itemNode[vpnode->arg_items[0]]; + + finfo = palloc0_object(FmgrInfo); /* will be freed as part of query + * context free */ + + /* Look up the btree comparison function for the datatype */ + typentry = lookup_type_cache(minmax->minmaxtype, + TYPECACHE_CMP_PROC); + + if (!OidIsValid(typentry->cmp_proc)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify a comparison function for type %s", + format_type_be(minmax->minmaxtype)))); + + fmgr_info(typentry->cmp_proc, finfo); + fmgr_info_set_expr((Node *) expr, finfo); + + for (int slot_index = skip_list[0]; + slot_index < max_slots; + slot_index += skip_list[slot_index + 1] + 1) + { + Datum value; + bool isnull; + + LOCAL_FCINFO(locfcinfo, 2); + int32 cmpresult; + + InitFunctionCallInfoData(*locfcinfo, finfo, 2, + collation, NULL, NULL); + locfcinfo->args[0].isnull = false; + locfcinfo->args[1].isnull = false; + + value = arg_node->itemValue[slot_index]; + isnull = arg_node->itemIsNull[slot_index]; + + if (isnull) + { + } + else if (itemIsNull[slot_index] == true) + { + /* first nonnull input, adopt value */ + itemValue[slot_index] = value; + itemIsNull[slot_index] = false; + } + else + { + /* apply comparison function */ + locfcinfo->args[0].value = itemValue[slot_index]; + locfcinfo->args[1].value = value; + locfcinfo->isnull = false; + cmpresult = DatumGetInt32(FunctionCallInvoke(locfcinfo)); + if (cmpresult > 0 && op == IS_LEAST) + itemValue[slot_index] = value; + else if (cmpresult < 0 && op == IS_GREATEST) + itemValue[slot_index] = value; + } + } +} + +static void +VciVPExecCoalesce_head(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + for (int i = 0; i < VCI_MAX_FETCHING_ROWS; i++) + { + vpnode->itemValue[i] = (Datum) 0; + vpnode->itemIsNull[i] = true; + } + + memcpy(vpnode->skip_list, vpnode->data.init.orig_skip_list, sizeof(uint16) * VCI_MAX_SKIP_LIST_SLOTS); +} + +static void +VciVPExecCoalesce_next(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + + int check_slot_index = 0; + VciVPNode *arg_node; + + arg_node = &vpcontext->itemNode[vpnode->arg_items[0]]; + + for (int slot_index = skip_list[0]; + slot_index < max_slots; + slot_index += skip_list[slot_index + 1] + 1) + { + Datum value; + bool isnull; + + value = arg_node->itemValue[slot_index]; + isnull = arg_node->itemIsNull[slot_index]; + + if (isnull) + { + itemValue[slot_index] = (Datum) 0; + itemIsNull[slot_index] = true; + + check_slot_index = slot_index + 1; + } + else + { + itemValue[slot_index] = value; + itemIsNull[slot_index] = false; + + skip_list[check_slot_index] += skip_list[slot_index + 1] + 1; + } + } +} + +static void +VciVPExecCase_head(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + for (int i = 0; i < VCI_MAX_FETCHING_ROWS; i++) + { + vpnode->itemValue[i] = (Datum) 0; + vpnode->itemIsNull[i] = true; + } + + memcpy(vpnode->skip_list, vpnode->data.init.orig_skip_list, sizeof(uint16) * VCI_MAX_SKIP_LIST_SLOTS); +} + +static void +VciVPExecCase_arg(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + vci_vp_exec_simple_copy(expr, vpnode, vpcontext, econtext, max_slots); +} + +static void +VciVPExecCase_cond(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list0 = vpnode->data.init.orig_skip_list; + uint16 *skip_list1 = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + + int check_slot_index0 = 0; + int check_slot_index1 = 0; + VciVPNode *arg_node; + + arg_node = &vpcontext->itemNode[vpnode->arg_items[0]]; + + memcpy(skip_list1, skip_list0, sizeof(uint16) * VCI_MAX_SKIP_LIST_SLOTS); + + for (int slot_index0 = skip_list0[0], + slot_index1 = skip_list1[0]; + slot_index0 < max_slots; + slot_index0 += skip_list0[slot_index0 + 1] + 1, + slot_index1 += skip_list1[slot_index1 + 1] + 1) + { + Datum clause_value; + bool isnull; + + clause_value = arg_node->itemValue[slot_index0]; + isnull = arg_node->itemIsNull[slot_index0]; + + if (DatumGetBool(clause_value) && !isnull) + { + itemValue[slot_index0] = arg_node->itemValue[slot_index0]; + itemIsNull[slot_index0] = arg_node->itemIsNull[slot_index0]; + + skip_list0[check_slot_index0] += skip_list0[slot_index0 + 1] + 1; + check_slot_index1 = slot_index1 + 1; + } + else + { + check_slot_index0 = slot_index0 + 1; + skip_list1[check_slot_index1] += skip_list1[slot_index1 + 1] + 1; + } + } +} + +static void +VciVPExecCase_result(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + vci_vp_exec_simple_copy(expr, vpnode, vpcontext, econtext, max_slots); +} + +static void +VciVPExecCaseTest(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + /* Do nothging */ +} + +static void +VciVPExecParamExec(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + + bool first_eval_exec = false; + Datum paramValue; + bool paramIsNull; + + for (int slot_index = skip_list[0]; + slot_index < max_slots; + slot_index += skip_list[slot_index + 1] + 1) + { + + if (!first_eval_exec) + { + paramValue = VciExecEvalParamExec_vp(vpnode, econtext, ¶mIsNull); + first_eval_exec = true; + } + + itemValue[slot_index] = paramValue; + itemIsNull[slot_index] = paramIsNull; + } +} + +static void +VciVPExecCoerceViaIO(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + VciVPNode *arg_node; + + FmgrInfo *outfunc = vpnode->data.iocoerce.finfo_out; /* lookup info for + * source output + * function */ + FmgrInfo *infunc = vpnode->data.iocoerce.finfo_in; /* lookup info for + * result input function */ + Oid typioparam = vpnode->data.iocoerce.typioparam; + + arg_node = &vpcontext->itemNode[vpnode->arg_items[0]]; + + for (int slot_index = skip_list[0]; + slot_index < max_slots; + slot_index += skip_list[slot_index + 1] + 1) + { + char *string; + Datum value; + bool isnull; + + value = arg_node->itemValue[slot_index]; + isnull = arg_node->itemIsNull[slot_index]; + + if (isnull) + string = NULL; + else + string = OutputFunctionCall(outfunc, value); + + value = InputFunctionCall(infunc, + string, + typioparam, + -1); + + itemValue[slot_index] = value; + itemIsNull[slot_index] = isnull; + } +} + +static void +VciVPExecVar(Expr *expression, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + /* Do nothging */ +} + +static void +VciVPExecConst(Expr *expression, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + /* Do nothging */ +} + +static void +vci_vp_exec_simple_copy(Expr *expr, VciVPNode *vpnode, VciVPContext *vpcontext, ExprContext *econtext, int max_slots) +{ + uint16 *skip_list = vpnode->skip_list; + Datum *itemValue = vpnode->itemValue; + bool *itemIsNull = vpnode->itemIsNull; + + VciVPNode *arg_node; + + arg_node = &vpcontext->itemNode[vpnode->arg_items[0]]; + + for (int slot_index = skip_list[0]; + slot_index < max_slots; + slot_index += skip_list[slot_index + 1] + 1) + { + itemValue[slot_index] = arg_node->itemValue[slot_index]; + itemIsNull[slot_index] = arg_node->itemIsNull[slot_index]; + } +} + +/***************************************************************************** + * Vector processing setting function + *****************************************************************************/ + +VciVPContext * +VciBuildVectorProcessing(Expr *node, PlanState *parent, ExprContext *econtext, uint16 *skip_list) +{ + VciVPContext *vpcontext; + VciVPNode *lastNode; + + if (node == NULL) + return NULL; + + vpcontext = vci_create_vp_context(); + + traverse_expr_state_tree(node, parent, econtext, vpcontext, skip_list); + + lastNode = &vpcontext->itemNode[vpcontext->num_item - 1]; + + vpcontext->resultValue = lastNode->itemValue; + vpcontext->resultIsNull = lastNode->itemIsNull; + + return vpcontext; +} + +static VciVPContext * +vci_create_vp_context(void) +{ + const vci_vp_item_id max = 16; + VciVPContext *vpcontext; + + vpcontext = palloc0_object(VciVPContext); + + vpcontext->num_item = 1; + vpcontext->max_item = max; + vpcontext->itemNode = palloc0_array(VciVPNode, max); + + return vpcontext; +} + +static vci_vp_item_id +vci_add_vp_node(VciVPExecOp_func evalfunc, Expr *expr, VciVPContext *vpcontext, int len_args, vci_vp_item_id *arg_items, bool allocValueAndIsNull, uint16 *skip_list) +{ + vci_vp_item_id item; + VciVPNode *vpnode; + + item = vpcontext->num_item; + + if (vpcontext->num_item + 1 >= vpcontext->max_item) + { + VciVPNode *oldnodes = vpcontext->itemNode; + VciVPNode *newnodes = palloc0_array(VciVPNode, vpcontext->max_item * 2); + + for (vci_vp_item_id j = 1; j < vpcontext->max_item; j++) + newnodes[j] = oldnodes[j]; + + vpcontext->max_item *= 2; + vpcontext->itemNode = newnodes; + + pfree(oldnodes); + } + + vpnode = &vpcontext->itemNode[item]; + + vpnode->evalfunc = evalfunc; + vpnode->expr = expr; + vpnode->len_args = len_args; + + if (len_args > 0) + { + int i; + + vpnode->arg_items = palloc_array(vci_vp_item_id, len_args); + + for (i = 0; i < len_args; i++) + vpnode->arg_items[i] = arg_items[i]; + } + + if (allocValueAndIsNull) + { + vpnode->itemValue = palloc_array(Datum, VCI_MAX_FETCHING_ROWS); + vpnode->itemIsNull = palloc_array(bool, VCI_MAX_FETCHING_ROWS); + } + + vpnode->skip_list = skip_list; + + vpcontext->num_item++; + + return item; +} + +static vci_vp_item_id +vci_add_var_node(Var *variable, PlanState *parent, VciVPContext *vpcontext, uint16 *skip_list) +{ + vci_vp_item_id ret; + VciVPNode *vpnode; + + VciScanState *scanstate = vci_search_scan_state((VciPlanState *) parent); + + ret = vci_add_vp_node(VciVPExecVar, (Expr *) variable, vpcontext, 0, NULL, false, skip_list); + + vpnode = &vpcontext->itemNode[ret]; + + if (variable->varno == OUTER_VAR) + { + vpnode->itemValue = scanstate->result_values[variable->varattno - 1]; + vpnode->itemIsNull = scanstate->result_isnull[variable->varattno - 1]; + } + else + { + int index; + + index = scanstate->attr_map[variable->varattno] - 1; + + Assert(index >= 0); + Assert(index < scanstate->vector_set->num_columns); + + vpnode->itemValue = vci_CSGetValueAddrFromVirtualTuplesColumnwise(scanstate->vector_set, index); + vpnode->itemIsNull = vci_CSGetIsNullAddrFromVirtualTuplesColumnwise(scanstate->vector_set, index); + } + + return ret; +} +static vci_vp_item_id +vci_add_param_node(Param *param, PlanState *parent, VciVPContext *vpcontext, uint16 *skip_list) +{ + vci_vp_item_id ret; + VciVPNode *vpnode; + + ret = vci_add_vp_node((VciVPExecOp_func) VciVPExecParamExec, (Expr *) param, vpcontext, 0, NULL, true, skip_list); + + vpnode = &vpcontext->itemNode[ret]; + + vpnode->data.param.paramid = param->paramid; + vpnode->data.param.paramtype = param->paramtype; + vpnode->data.param.vci_parent_plan = parent->plan; + + return ret; +} +static vci_vp_item_id +vci_add_const_node(Const *con, VciVPContext *vpcontext, uint16 *skip_list) +{ + vci_vp_item_id ret; + VciVPNode *vpnode; + Datum *itemValue; + bool *itemIsNull; + + ret = vci_add_vp_node(VciVPExecConst, (Expr *) con, vpcontext, 0, NULL, true, skip_list); + + vpnode = &vpcontext->itemNode[ret]; + + itemValue = vpnode->itemValue; + itemIsNull = vpnode->itemIsNull; + + for (int i = 0; i < VCI_MAX_FETCHING_ROWS; i++) + { + itemValue[i] = con->constvalue; + itemIsNull[i] = con->constisnull; + } + + return ret; +} +static vci_vp_item_id +vci_add_func_expr_node(Expr *expr, VciVPContext *vpcontext, FuncExprinfo *funcinfo, PlanState *parent, ExprContext *econtext, uint16 *skip_list) +{ + + vci_vp_item_id result; + int i; + int len_args = 0; + vci_vp_item_id *arg_items; + ListCell *l; + + len_args = list_length(funcinfo->args); + if (len_args > 0) + arg_items = palloc_array(vci_vp_item_id, len_args); + else + arg_items = NULL; + + i = 0; + foreach(l, funcinfo->args) + { + Expr *arg = (Expr *) lfirst(l); + + arg_items[i] = traverse_expr_state_tree(arg, parent, econtext, vpcontext, skip_list); + i++; + } + + /* + * pgstat_init_function_usage() + */ + + if (pgstat_track_functions <= funcinfo->fcinfo_data->flinfo->fn_stats) + { + switch (list_length(funcinfo->args)) + { + case 0: + result = vci_add_vp_node((VciVPExecOp_func) VciVPExecFunc_arg0, + expr, vpcontext, len_args, arg_items, true, skip_list); + goto func_expr_state_done; + + case 1: + result = vci_add_vp_node((VciVPExecOp_func) VciVPExecFunc_arg1, + expr, vpcontext, len_args, arg_items, true, skip_list); + goto func_expr_state_done; + + case 2: + result = vci_add_vp_node((VciVPExecOp_func) VciVPExecFunc_arg2, + expr, vpcontext, len_args, arg_items, true, skip_list); + goto func_expr_state_done; + + default: + break; + } + } + + result = vci_add_vp_node((VciVPExecOp_func) VciVPExecFunc, + expr, vpcontext, len_args, arg_items, true, skip_list); + +func_expr_state_done: + { + VciVPNode *vpnode; + + vpnode = &vpcontext->itemNode[result]; + vpnode->data.func.finfo = funcinfo->finfo; + vpnode->data.func.fcinfo_data = funcinfo->fcinfo_data; + vpnode->data.func.fn_addr = funcinfo->fn_addr; + vpnode->data.func.nargs = funcinfo->nargs; + + if (arg_items) + pfree(arg_items); + } + return result; +} +static vci_vp_item_id +vci_add_control_nodes(VciVPExecOp_func head_func, VciVPExecOp_func next_func, List *args, + Expr *expr, PlanState *parent, ExprContext *econtext, VciVPContext *vpcontext, uint16 *skip_list) +{ + vci_vp_item_id ret; + ListCell *l; + Datum *itemValue = palloc_array(Datum, VCI_MAX_FETCHING_ROWS); + bool *itemIsNull = palloc_array(bool, VCI_MAX_FETCHING_ROWS); + uint16 *inner_skip_list = palloc_array(uint16, VCI_MAX_SKIP_LIST_SLOTS); + VciVPNode *head_node; + + ret = vci_add_vp_node(head_func, expr, vpcontext, 0, NULL, false, inner_skip_list); + + head_node = &vpcontext->itemNode[ret]; + head_node->itemValue = itemValue; + head_node->itemIsNull = itemIsNull; + head_node->data.init.orig_skip_list = skip_list; + + foreach(l, args) + { + Expr *arg = (Expr *) lfirst(l); + vci_vp_item_id next_item; + VciVPNode *next_node; + + next_item = traverse_expr_state_tree(arg, parent, econtext, vpcontext, inner_skip_list); + ret = vci_add_vp_node(next_func, expr, vpcontext, 1, &next_item, false, inner_skip_list); + + next_node = &vpcontext->itemNode[ret]; + next_node->itemValue = itemValue; + next_node->itemIsNull = itemIsNull; + } + + return ret; +} + +static vci_vp_item_id +traverse_expr_state_tree(Expr *node, PlanState *parent, ExprContext *econtext, VciVPContext *vpcontext, uint16 *skip_list) +{ + + if (node == NULL) + return 0; + + /* Guard against stack overflow due to overly complex expressions */ + check_stack_depth(); + + if (IsA(node, List)) + { + int num_args = list_length((List *) node); + + if (num_args > 1) + return vci_add_control_nodes(VciVPExecAnd_head, VciVPExecAnd_nullasfalse_next, (List *) node, + node, parent, econtext, vpcontext, skip_list); + else if (num_args == 1) + return traverse_expr_state_tree(linitial((List *) node), parent, econtext, vpcontext, skip_list); + } + + switch (nodeTag(node)) + { + case T_Var: + + /* + * Assert(state->evalfunc == (ExprStateEvalFunc) + * VciExecEvalScalarVarFromColumnStore); + * + * If execinitexpr for qual is decided to be not needed, then this + * assertion also becomes invalid + */ + + return vci_add_var_node((Var *) node, parent, vpcontext, skip_list); + + case T_Const: + return vci_add_const_node((Const *) node, vpcontext, skip_list); + + case T_Param: + return vci_add_param_node((Param *) node, parent, vpcontext, skip_list); + + /* + * return vci_add_vp_node((VciVPExecOp_func) VciVPExecParamExec, + * node, vpcontext, 0, NULL, true, skip_list); + */ + + case T_Aggref: + /* LCOV_EXCL_START */ + elog(ERROR, "Aggref should not be targeted by vector processing"); + node = NULL; + break; + /* LCOV_EXCL_STOP */ + + case T_OpExpr: + { + OpExpr *op = (OpExpr *) node; + FuncExprinfo *funcinfo = palloc0_object(struct FuncExprinfo); /* will be freed as part + * of query context free */ + + VciVPExecInitFunc(node, op->args, op->opfuncid, op->inputcollid, parent, funcinfo); + return vci_add_func_expr_node(node, vpcontext, funcinfo, parent, econtext, skip_list); + } + + case T_FuncExpr: + { + FuncExpr *func = (FuncExpr *) node; + FuncExprinfo *funcinfo = palloc0_object(struct FuncExprinfo); /* will be freed as part + * of query context free */ + + VciVPExecInitFunc(node, func->args, func->funcid, func->inputcollid, parent, funcinfo); + return vci_add_func_expr_node(node, vpcontext, funcinfo, parent, econtext, skip_list); + } + + case T_DistinctExpr: + { + DistinctExpr *op = (DistinctExpr *) node; + FuncExprinfo *funcinfo = palloc0_object(struct FuncExprinfo); /* will be freed as part + * of query context free */ + vci_vp_item_id result; + VciVPNode *vpnode; + + vci_vp_item_id arg_items[2]; + + /* + * Not required as this was the value always set earlier in + * execinitexpr stage + */ + + VciVPExecInitFunc(node, op->args, op->opfuncid, op->inputcollid, parent, funcinfo); + + Assert(list_length(funcinfo->args) == 2); + + arg_items[0] = traverse_expr_state_tree(list_nth(funcinfo->args, 0), parent, econtext, vpcontext, skip_list); + arg_items[1] = traverse_expr_state_tree(list_nth(funcinfo->args, 1), parent, econtext, vpcontext, skip_list); + + result = vci_add_vp_node((VciVPExecOp_func) VciVPExecDistinctExpr, + node, vpcontext, 2, arg_items, true, skip_list); + + vpnode = &vpcontext->itemNode[result]; + vpnode->data.func.finfo = funcinfo->finfo; + vpnode->data.func.fcinfo_data = funcinfo->fcinfo_data; + vpnode->data.func.fn_addr = funcinfo->fn_addr; + vpnode->data.func.nargs = funcinfo->nargs; + + return result; + } + + case T_NullIfExpr: + { + NullIfExpr *op = (NullIfExpr *) node; + vci_vp_item_id arg_items[2]; + FuncExprinfo *funcinfo = palloc0_object(struct FuncExprinfo); /* will be freed as part + * of query context free */ + vci_vp_item_id result; + VciVPNode *vpnode; + + /* + * Not required as this was the value always set earlier in + * execinitexpr stage + */ + VciVPExecInitFunc(node, op->args, op->opfuncid, op->inputcollid, parent, funcinfo); + + Assert(list_length(funcinfo->args) == 2); + + arg_items[0] = traverse_expr_state_tree(list_nth(funcinfo->args, 0), parent, econtext, vpcontext, skip_list); + arg_items[1] = traverse_expr_state_tree(list_nth(funcinfo->args, 1), parent, econtext, vpcontext, skip_list); + + result = vci_add_vp_node((VciVPExecOp_func) VciVPExecNullIfExpr, + node, vpcontext, 2, arg_items, true, skip_list); + + vpnode = &vpcontext->itemNode[result]; + vpnode->data.func.finfo = funcinfo->finfo; + vpnode->data.func.fcinfo_data = funcinfo->fcinfo_data; + vpnode->data.func.fn_addr = funcinfo->fn_addr; + vpnode->data.func.nargs = funcinfo->nargs; + + return result; + } + + case T_ScalarArrayOpExpr: + { + ScalarArrayOpExpr *op = (ScalarArrayOpExpr *) node; + vci_vp_item_id arg_items[2]; + FuncExprinfo *funcinfo = palloc0_object(struct FuncExprinfo); /* will be freed as part + * of query context free */ + vci_vp_item_id result; + VciVPNode *vpnode; + + /* + * Not required as this was the value always set earlier in + * execinitexpr stage + */ + VciVPExecInitFunc(node, op->args, op->opfuncid, op->inputcollid, parent, funcinfo); + + Assert(list_length(funcinfo->args) == 2); + + arg_items[0] = traverse_expr_state_tree(list_nth(funcinfo->args, 0), parent, econtext, vpcontext, skip_list); + arg_items[1] = traverse_expr_state_tree(list_nth(funcinfo->args, 1), parent, econtext, vpcontext, skip_list); + + if (OidIsValid(op->hashfuncid)) + { + FmgrInfo *hash_finfo = palloc0_object(FmgrInfo); + FunctionCallInfo hash_fcinfo = palloc0(SizeForFunctionCallInfo(1)); + + fmgr_info(op->hashfuncid, hash_finfo); + fmgr_info_set_expr((Node *) node, hash_finfo); + InitFunctionCallInfoData(*hash_fcinfo, hash_finfo, + 1, op->inputcollid, NULL, + NULL); + + result = vci_add_vp_node((VciVPExecOp_func) VciVPExecHashedScalarArrayOpExpr, + node, vpcontext, 2, arg_items, true, skip_list); + + vpnode = &vpcontext->itemNode[result]; + vpnode->data.hashedscalararrayop.finfo = funcinfo->finfo; + vpnode->data.hashedscalararrayop.fcinfo_data = funcinfo->fcinfo_data; + vpnode->data.hashedscalararrayop.fn_addr = funcinfo->fn_addr; + + vpnode->data.hashedscalararrayop.hash_finfo = funcinfo->finfo; + vpnode->data.hashedscalararrayop.hash_fcinfo_data = funcinfo->fcinfo_data; + vpnode->data.hashedscalararrayop.hash_fn_addr = funcinfo->fn_addr; + } + else + { + result = vci_add_vp_node((VciVPExecOp_func) VciVPExecScalarArrayOpExpr, + node, vpcontext, 2, arg_items, true, skip_list); + + vpnode = &vpcontext->itemNode[result]; + vpnode->data.scalararrayop.element_type = InvalidOid; + vpnode->data.scalararrayop.useOr = op->useOr; + vpnode->data.scalararrayop.finfo = funcinfo->finfo; + vpnode->data.scalararrayop.fcinfo_data = funcinfo->fcinfo_data; + vpnode->data.scalararrayop.fn_addr = funcinfo->fn_addr; + } + + return result; + } + + case T_RelabelType: + { + RelabelType *relabel = (RelabelType *) node; + + return traverse_expr_state_tree(relabel->arg, parent, econtext, vpcontext, skip_list); + } + case T_NullTest: + { + vci_vp_item_id ret; + NullTest *ntest = (NullTest *) node; + + ret = traverse_expr_state_tree(ntest->arg, parent, econtext, vpcontext, skip_list); + return vci_add_vp_node((VciVPExecOp_func) VciVPExecNullTest, + node, vpcontext, 1, &ret, true, skip_list); + } + + case T_BooleanTest: + { + BooleanTest *booltest = (BooleanTest *) node; + vci_vp_item_id ret; + + ret = traverse_expr_state_tree(booltest->arg, parent, econtext, vpcontext, skip_list); + return vci_add_vp_node((VciVPExecOp_func) VciVPExecBooleanTest, + node, vpcontext, 1, &ret, true, skip_list); + } + + case T_BoolExpr: + { + BoolExpr *boolexpr = (BoolExpr *) node; + vci_vp_item_id arg_item; + + switch (boolexpr->boolop) + { + case AND_EXPR: + return vci_add_control_nodes(VciVPExecAnd_head, VciVPExecAnd_next, boolexpr->args, + node, parent, econtext, vpcontext, skip_list); + break; + + case OR_EXPR: + return vci_add_control_nodes(VciVPExecOr_head, VciVPExecOr_next, boolexpr->args, + node, parent, econtext, vpcontext, skip_list); + break; + + case NOT_EXPR: + arg_item = traverse_expr_state_tree((Expr *) linitial(boolexpr->args), parent, econtext, vpcontext, skip_list); + return vci_add_vp_node((VciVPExecOp_func) VciVPExecNot, + node, vpcontext, 1, &arg_item, true, skip_list); + + default: + /* LCOV_EXCL_START */ + elog(ERROR, "unrecognized boolop: %d", + (int) boolexpr->boolop); + break; + /* LCOV_EXCL_STOP */ + } + } + break; + + case T_MinMaxExpr: + { + MinMaxExpr *minmaxexpr = (MinMaxExpr *) node; + + return vci_add_control_nodes(VciVPExecMinMax_head, VciVPExecMinMax_next, minmaxexpr->args, + node, parent, econtext, vpcontext, skip_list); + } + + case T_CoalesceExpr: + { + CoalesceExpr *cexpr = (CoalesceExpr *) node; + + return vci_add_control_nodes(VciVPExecCoalesce_head, VciVPExecCoalesce_next, cexpr->args, + node, parent, econtext, vpcontext, skip_list); + } + + case T_CoerceViaIO: + { + CoerceViaIO *coerceViaIOexpr = (CoerceViaIO *) node; + vci_vp_item_id ret; + VciVPNode *vpnode; + Oid iofunc; + Oid typioparam; + bool typisvarlena; + FmgrInfo *finfo_out = palloc0_object(struct FmgrInfo); /* will be freed as part + * of query context free */ + FmgrInfo *finfo_in = palloc0_object(struct FmgrInfo); /* will be freed as part + * of query context free */ + + ret = traverse_expr_state_tree(coerceViaIOexpr->arg, parent, econtext, vpcontext, skip_list); + + ret = vci_add_vp_node((VciVPExecOp_func) VciVPExecCoerceViaIO, + node, vpcontext, 1, &ret, true, skip_list); + getTypeOutputInfo(exprType((Node *) coerceViaIOexpr->arg), + &iofunc, &typisvarlena); + fmgr_info(iofunc, finfo_out); + fmgr_info_set_expr((Node *) node, finfo_out); + + getTypeInputInfo(coerceViaIOexpr->resulttype, + &iofunc, &typioparam); + fmgr_info(iofunc, finfo_in); + fmgr_info_set_expr((Node *) node, finfo_in); + + vpnode = &vpcontext->itemNode[ret]; + vpnode->data.iocoerce.finfo_out = finfo_out; + vpnode->data.iocoerce.finfo_in = finfo_in; + vpnode->data.iocoerce.typioparam = typioparam; + + return ret; + } + + case T_CaseExpr: + { + CaseExpr *caseExpr = (CaseExpr *) node; + vci_vp_item_id head, + ret = 0, + save_caseValue; + ListCell *lc; + Datum *itemValue = palloc_array(Datum, VCI_MAX_FETCHING_ROWS); + bool *itemIsNull = palloc_array(bool, VCI_MAX_FETCHING_ROWS); + Datum *caseValue = palloc_array(Datum, VCI_MAX_FETCHING_ROWS); + bool *caseIsNull = palloc_array(bool, VCI_MAX_FETCHING_ROWS); + uint16 *case_whole_skip_list = palloc_array(uint16, VCI_MAX_SKIP_LIST_SLOTS); + VciVPNode *arg_node; + + head = vci_add_vp_node(VciVPExecCase_head, node, vpcontext, 0, NULL, false, case_whole_skip_list); + + arg_node = &vpcontext->itemNode[head]; + arg_node->itemValue = caseValue; + arg_node->itemIsNull = caseIsNull; + arg_node->data.init.orig_skip_list = skip_list; + + save_caseValue = vpcontext->caseValue; + vpcontext->caseValue = head; + + if (caseExpr->arg) + { + vci_vp_item_id arg_item; + + arg_item = traverse_expr_state_tree(caseExpr->arg, parent, econtext, vpcontext, case_whole_skip_list); + ret = vci_add_vp_node(VciVPExecCase_arg, node, vpcontext, 1, &arg_item, false, case_whole_skip_list); + arg_node = &vpcontext->itemNode[ret]; + arg_node->itemValue = caseValue; + arg_node->itemIsNull = caseIsNull; + } + + foreach(lc, caseExpr->args) + { + CaseWhen *when = lfirst(lc); + vci_vp_item_id arg_item; + uint16 *each_case_skip_list = palloc_array(uint16, VCI_MAX_SKIP_LIST_SLOTS); + + /* WHEN evaluation */ + + arg_item = traverse_expr_state_tree(when->expr, parent, econtext, vpcontext, case_whole_skip_list); + ret = vci_add_vp_node(VciVPExecCase_cond, node, vpcontext, 1, &arg_item, false, each_case_skip_list); + + arg_node = &vpcontext->itemNode[ret]; + arg_node->itemValue = caseValue; + arg_node->itemIsNull = caseIsNull; + arg_node->data.init.orig_skip_list = case_whole_skip_list; + + vpcontext->caseValue = save_caseValue; + + /* THEN evaluation */ + + arg_item = traverse_expr_state_tree(when->result, parent, econtext, vpcontext, each_case_skip_list); + ret = vci_add_vp_node(VciVPExecCase_result, node, vpcontext, 1, &arg_item, false, each_case_skip_list); + + arg_node = &vpcontext->itemNode[ret]; + arg_node->itemValue = itemValue; + arg_node->itemIsNull = itemIsNull; + + save_caseValue = vpcontext->caseValue; + vpcontext->caseValue = head; + } + + vpcontext->caseValue = save_caseValue; + + if (caseExpr->defresult) + { + /* ELSE evaluation */ + vci_vp_item_id arg_item; + vci_vp_item_id save_caseValue_defresult; + + save_caseValue_defresult = vpcontext->caseValue; + + arg_item = traverse_expr_state_tree(caseExpr->defresult, parent, econtext, vpcontext, case_whole_skip_list); + ret = vci_add_vp_node(VciVPExecCase_result, node, vpcontext, 1, &arg_item, false, case_whole_skip_list); + + arg_node = &vpcontext->itemNode[ret]; + arg_node->itemValue = itemValue; + arg_node->itemIsNull = itemIsNull; + + vpcontext->caseValue = save_caseValue_defresult; + } + + Assert(ret > 0); + + return ret; + } + + case T_CaseTestExpr: + { + vci_vp_item_id ret; + VciVPNode *arg_node, + *caseValue_node; + + ret = vci_add_vp_node(VciVPExecCaseTest, node, vpcontext, 0, NULL, false, skip_list); + + caseValue_node = &vpcontext->itemNode[vpcontext->caseValue]; + + arg_node = &vpcontext->itemNode[ret]; + arg_node->itemValue = caseValue_node->itemValue; + arg_node->itemIsNull = caseValue_node->itemIsNull; + + return ret; + } + + case T_List: + case T_TargetEntry: + /* LCOV_EXCL_START */ + Assert(0); + break; + /* LCOV_EXCL_STOP */ + + default: + /* LCOV_EXCL_START */ + elog(ERROR, "unrecognized node type: %s(%d)", + VciGetNodeName(nodeTag(node)), (int) nodeTag(node)); + node = NULL; /* keep compiler quiet */ + break; + /* LCOV_EXCL_STOP */ + } + + Assert(0); + return 0; +} +static +void +VciVPExecInitFunc(Expr *node, List *args, Oid funcid, Oid inputcollid, PlanState *parent, FuncExprinfo *funcinfo) +{ + int nargs = list_length(args); + AclResult aclresult; + FmgrInfo *flinfo; + FunctionCallInfo fcinfo; + + /* Check permission to call function */ + aclresult = object_aclcheck(ProcedureRelationId, funcid, GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FUNCTION, get_func_name(funcid)); + InvokeFunctionExecuteHook(funcid); + + /* + * Safety check on nargs. Under normal circumstances this should never + * fail, as parser should check sooner. But possibly it might fail if + * server has been compiled with FUNC_MAX_ARGS smaller than some functions + * declared in pg_proc? + */ + if (nargs > FUNC_MAX_ARGS) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_ARGUMENTS), + errmsg_plural("cannot pass more than %d argument to a function", + "cannot pass more than %d arguments to a function", + FUNC_MAX_ARGS, + FUNC_MAX_ARGS))); + + /* Allocate function lookup data and parameter workspace for this call */ + funcinfo->finfo = palloc0_object(FmgrInfo); + funcinfo->fcinfo_data = palloc0(SizeForFunctionCallInfo(nargs)); + flinfo = funcinfo->finfo; + fcinfo = funcinfo->fcinfo_data; + + /* Set up the primary fmgr lookup information */ + fmgr_info(funcid, flinfo); + fmgr_info_set_expr((Node *) node, flinfo); + + /* Initialize function call parameter structure too */ + InitFunctionCallInfoData(*fcinfo, flinfo, + nargs, inputcollid, NULL, NULL); + + /* Keep extra copies of this info to save an indirection at runtime */ + funcinfo->fn_addr = flinfo->fn_addr; + funcinfo->nargs = nargs; + + funcinfo->args = args; + funcinfo->funcid = funcid; + funcinfo->inputcollid = inputcollid; + + Assert(!flinfo->fn_retset); + +} + +Datum +VciExecEvalParamExec_vp(VciVPNode *vpnode, ExprContext *econtext, + bool *isNull) +{ + int thisParamId = vpnode->data.param.paramid; + ParamExecData *prm; + + /* + * PARAM_EXEC params (internal executor parameters) are stored in the + * ecxt_param_exec_vals array, and can be accessed by array index. + */ + prm = &(econtext->ecxt_param_exec_vals[thisParamId]); + + if (prm->execPlan != NULL) + { + /* Parameter not evaluated yet, so go do it */ + ExecSetParamPlan(prm->execPlan, econtext); + /* ExecSetParamPlan should have processed this param... */ + Assert(prm->execPlan == NULL); + } + + *isNull = prm->isnull; + return prm->value; +} diff --git a/contrib/vci/include/vci_aggref.h b/contrib/vci/include/vci_aggref.h new file mode 100644 index 0000000..f2b3ad7 --- /dev/null +++ b/contrib/vci/include/vci_aggref.h @@ -0,0 +1,227 @@ +/*------------------------------------------------------------------------- + * + * vci_aggref.h + * Definitions and declarations about VCI Aggref + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/include/vci_aggref.h + * + *------------------------------------------------------------------------- + */ +#ifndef VCI_AGGREF_H +#define VCI_AGGREF_H + +#include "postgres.h" + +#include "access/attnum.h" +#include "access/tupdesc.h" +#include "executor/tuptable.h" +#include "fmgr.h" +#include "nodes/execnodes.h" +#include "nodes/primnodes.h" +#include "utils/tuplesort.h" + +#include "vci_executor.h" + +/** + * AggStatePerAggData - per-aggregate working state for the Agg scan + * + * copied from src/backend/executor/nodeAgg.c + */ +typedef struct VciAggStatePerAggData +{ + /* + * These values are set up during ExecInitAgg() and do not change + * thereafter: + */ + + /* Links to Aggref expr and state nodes this working state is for */ + Aggref *aggref; + + /* + * Nominal number of arguments for aggregate function. For plain aggs, + * this excludes any ORDER BY expressions. For ordered-set aggs, this + * counts both the direct and aggregated (ORDER BY) arguments. + */ + int numArguments; + + /* + * Number of aggregated input columns. This includes ORDER BY expressions + * in both the plain-agg and ordered-set cases. Ordered-set direct args + * are not counted, though. + */ + int numInputs; + + /* + * Number of aggregated input columns to pass to the transfn. This + * includes the ORDER BY columns for ordered-set aggs, but not for plain + * aggs. (This doesn't count the transition state value!) + */ + int numTransInputs; + + /* + * Number of arguments to pass to the finalfn. This is always at least 1 + * (the transition state value) plus any ordered-set direct args. If the + * finalfn wants extra args then we pass nulls corresponding to the + * aggregated input columns. + */ + int numFinalArgs; + + /* Oids of transfer functions */ + Oid transfn_oid; + Oid finalfn_oid; /* may be InvalidOid */ + + /* + * fmgr lookup data for transfer functions --- only valid when + * corresponding oid is not InvalidOid. Note in particular that fn_strict + * flags are kept here. + */ + FmgrInfo transfn; + FmgrInfo finalfn; + + /* Input collation derived for aggregate */ + Oid aggCollation; + + /* number of sorting columns */ + int numSortCols; + + /* number of sorting columns to consider in DISTINCT comparisons */ + /* (this is either zero or the same as numSortCols) */ + int numDistinctCols; + + /* deconstructed sorting information (arrays of length numSortCols) */ + AttrNumber *sortColIdx; + Oid *sortOperators; + Oid *sortCollations; + bool *sortNullsFirst; + + /* + * fmgr lookup data for input columns' equality operators --- only + * set/used when aggregate has DISTINCT flag. Note that these are in + * order of sort column index, not parameter index. + */ + FmgrInfo *equalfns; /* array of length numDistinctCols */ + + /* + * initial value from pg_aggregate entry + */ + Datum initValue; + bool initValueIsNull; + + /* + * We need the len and byval info for the agg's input, result, and + * transition data types in order to know how to copy/delete values. + * + * Note that the info for the input type is used only when handling + * DISTINCT aggs with just one argument, so there is only one input type. + */ + int16 inputtypeLen, + resulttypeLen, + transtypeLen; + bool inputtypeByVal, + resulttypeByVal, + transtypeByVal; + + /* + * Stuff for evaluation of inputs. We used to just use ExecEvalExpr, but + * with the addition of ORDER BY we now need at least a slot for passing + * data to the sort object, which requires a tupledesc, so we might as + * well go whole hog and use ExecProject too. + */ + TupleDesc evaldesc; /* descriptor of input tuples */ + VciProjectionInfo *evalproj; /* projection machinery */ + + /* + * Slots for holding the evaluated input arguments. These are set up + * during ExecInitAgg() and then used for each input row. + */ + TupleTableSlot *evalslot; /* current input tuple */ + TupleTableSlot *uniqslot; /* used for multi-column DISTINCT */ + + /* + * These values are working state that is initialized at the start of an + * input tuple group and updated for each input tuple. + * + * For a simple (non DISTINCT/ORDER BY) aggregate, we just feed the input + * values straight to the transition function. If it's DISTINCT or + * requires ORDER BY, we pass the input values into a Tuplesort object; + * then at completion of the input tuple group, we scan the sorted values, + * eliminate duplicates if needed, and run the transition function on the + * rest. + */ + + Tuplesortstate *sortstate; /* sort object, if DISTINCT or ORDER BY */ + + /* + * This field is a pre-initialized FunctionCallInfo struct used for + * calling this aggregate's transfn. We save a few cycles per row by not + * re-initializing the unchanging fields; which isn't much, but it seems + * worth the extra space consumption. + */ + FunctionCallInfo transfn_fcinfo; + + /*----------------------------------------------------------------------*/ + /* Definitions above must same as AggStatePerAggData */ + /*----------------------------------------------------------------------*/ + + VciAdvanceAggref_Func advance_aggref; /* advance aggregation function */ + + Datum (*copy_trans) (Datum, bool, int); /* transition data copy + * function */ + FmgrInfo merge_transfn; /* function information for merging transition + * data */ + FmgrInfo send_transfn; /* function information for converting + * transition data to binary */ + FmgrInfo recv_transfn; /* function informayion for converting + * transition data from binary */ + + Oid recv_trans_typioparam; /* information to be passed as + * argument when recv_transfn is + * called */ + + FunctionCallInfo merge_trans_fcinfo; /* datastruct needed to call + * function via merge_transfn */ + FunctionCallInfo send_trans_fcinfo; /* datastruct needed to call function + * via send_trans */ + FunctionCallInfo recv_trans_fcinfo; /* datastruct needed to call function + * via recv_transfn */ + +} VciAggStatePerAggData; + +/** + * AggStatePerGroupData - per-aggregate-per-group working state + * + * These values are working state that is initialized at the start of + * an input tuple group and updated for each input tuple. + * + * In AGG_PLAIN and AGG_SORTED modes, we have a single array of these + * structs (pointed to by aggstate->pergroup); we re-use the array for + * each input group, if it's AGG_SORTED mode. In AGG_HASHED mode, the + * hash table contains an array of these structs for each tuple group. + * + * Logically, the sortstate field belongs in this struct, but we do not + * keep it here for space reasons: we don't support DISTINCT aggregates + * in AGG_HASHED mode, so there's no reason to use up a pointer field + * in every entry of the hashtable. + * + * copied from src/backend/executor/nodeAgg.c + */ +typedef struct VciAggStatePerGroupData +{ + Datum transValue; /* current transition value */ + bool transValueIsNull; + + bool noTransValue; /* true if transValue not set yet */ + + /* + * Note: noTransValue initially has the same value as transValueIsNull, + * and if true both are cleared to false at the same time. They are not + * the same though: if transfn later returns a NULL, we want to keep that + * NULL and not auto-replace it with a later input value. Only the first + * non-NULL input will be auto-substituted. + */ +} VciAggStatePerGroupData; + +#endif /* VCI_AGGREF_H */ diff --git a/contrib/vci/include/vci_aggref_impl.inc b/contrib/vci/include/vci_aggref_impl.inc new file mode 100644 index 0000000..bf9de0a --- /dev/null +++ b/contrib/vci/include/vci_aggref_impl.inc @@ -0,0 +1,873 @@ +/*------------------------------------------------------------------------- + * + * vci_aggref_impl.h + * Templates for specialized advance_aggref functions + * + * This file is included by vci_aggref.c. This template can be used like: + * + * #define VCI_ADVANCE_AGGREF_FUNC aggref_0input_default + * #include "executor/vci_aggref_impl.h" + * #undef VCI_ADVANCE_AGGREF_FUNC * + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/include/vci_aggref.impl.h + * + *------------------------------------------------------------------------- + */ + +#include "utils/float.h" +#include "datatype/timestamp.h" + +#include "vci_executor.h" + +static void +VCI_ADVANCE_AGGREF_FUNC(VciAggState *aggstate, + int aggno, + VciAggStatePerGroup *entries, + int max_slots) +{ + MemoryContext oldContext; + int slot_index; + VciScanState *scanstate; + uint16 *skip_list; + +#if VCI_TRANS_INPUTS_ARG == VCI_TRANS_INPUTS_0 + + /* + * aggref_0input_int8inc does not use these variable, skip the + * declaration. + */ +#if VCI_TRANFN_OID != F_INT8INC + Datum *inputValues = NULL; + bool *inputIsNulls = NULL; + VciAggStatePerAgg peraggstate = &aggstate->peragg[aggno]; +#endif /* VCI_TRANFN_OID */ + +#elif VCI_TRANS_INPUTS_ARG == VCI_TRANS_INPUTS_1_SIMPLEVAR + Datum *inputValues = NULL; + bool *inputIsNulls = NULL; + VciAggStatePerAgg peraggstate = &aggstate->peragg[aggno]; + VciProjectionInfo *projInfo; + int attno; + +#elif VCI_TRANS_INPUTS_ARG == VCI_TRANS_INPUTS_1_EVALEXPR + Datum *inputValues = NULL; + bool *inputIsNulls = NULL; + VciAggStatePerAgg peraggstate = &aggstate->peragg[aggno]; + VciProjectionInfo *projInfo; + ExprContext *econtext; + VciVPContext *vpcontext; +#endif /* SELECT VCI_TRANS_INPUTS_ARG */ + + scanstate = (VciScanState *) outerPlanState(aggstate); + Assert(scanstate->vci.css.ss.ps.type == T_CustomScanState); + skip_list = vci_CSGetSkipFromVirtualTuples(scanstate->vector_set); + +#if VCI_TRANS_INPUTS_ARG == VCI_TRANS_INPUTS_0 + +#elif VCI_TRANS_INPUTS_ARG == VCI_TRANS_INPUTS_1_SIMPLEVAR + projInfo = peraggstate->evalproj; + + attno = projInfo->pi_varNumbers[0]; + + inputValues = scanstate->result_values[attno - 1]; + inputIsNulls = scanstate->result_isnull[attno - 1]; + +#elif VCI_TRANS_INPUTS_ARG == VCI_TRANS_INPUTS_1_EVALEXPR + + projInfo = peraggstate->evalproj; + econtext = projInfo->pi_exprContext; + + vpcontext = projInfo->pi_vp_tle_array[0]; + + oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); + VciExecEvalVectorProcessing(vpcontext, econtext, max_slots); + MemoryContextSwitchTo(oldContext); + + inputValues = vpcontext->resultValue; + inputIsNulls = vpcontext->resultIsNull; + +#endif /* SELECT VCI_TRANS_INPUTS_ARG */ + + for (slot_index = skip_list[0]; slot_index < max_slots; slot_index += skip_list[slot_index + 1] + 1) + { + VciAggStatePerGroup pergroupstate; + Datum newVal; + bool newIsNull; + + pergroupstate = &(entries[slot_index])[aggno]; + + if (VCI_TRANS_FN_STRICT) /* peraggstate->transfn.fn_strict or 1 or + * 0 */ + { + /* + * For a strict transfn, nothing happens when there's a NULL + * input; we just keep the prior transValue. + */ +#if (VCI_TRANS_INPUTS_ARG == VCI_TRANS_INPUTS_1_SIMPLEVAR) || (VCI_TRANS_INPUTS_ARG == VCI_TRANS_INPUTS_1_EVALEXPR) + if (inputIsNulls[slot_index]) + continue; +#endif + + if (pergroupstate->noTransValue) + { + /* + * transValue has not been initialized. This is the first + * non-NULL input value. We use it as the initial value for + * transValue. (We already checked that the agg's input type + * is binary-compatible with its transtype, so straight copy + * here is OK.) + * + * We must copy the datum into aggcontext if it is + * pass-by-ref. We do not need to pfree the old transValue, + * since it's NULL. + */ + oldContext = MemoryContextSwitchTo(aggstate->aggcontext); +#if VCI_TRANS_INPUTS_ARG == VCI_TRANS_INPUTS_0 + pergroupstate->transValue = 0; +#elif VCI_TRANS_TYPE_BYVAL <= 0 + pergroupstate->transValue = datumCopy(inputValues[slot_index], + peraggstate->transtypeByVal, + peraggstate->transtypeLen); +#else + pergroupstate->transValue = inputValues[slot_index]; +#endif + pergroupstate->transValueIsNull = false; + pergroupstate->noTransValue = false; + MemoryContextSwitchTo(oldContext); + continue; + } + if (pergroupstate->transValueIsNull) + { + /* + * Don't call a strict function with NULL inputs. Note it is + * possible to get here despite the above tests, if the + * transfn is strict *and* returned a NULL on a prior cycle. + * If that happens we will propagate the NULL all the way to + * the end. + */ + continue; + } + } + +#if VCI_TRANS_TYPE_BYVAL <= 0 + /* We run the transition functions in per-input-tuple memory context */ + oldContext = MemoryContextSwitchTo(aggstate->tmpcontext->ecxt_per_tuple_memory); +#endif + +#ifdef VCI_TRANS_USE_CURPERAGG + /* set up aggstate->curperagg for AggGetAggref() */ + aggstate->pseudo_aggstate->curperagg = (AggStatePerAgg) peraggstate; /* @remark */ +#endif + +#if VCI_TRANFN_OID == F_FLOAT4_ACCUM /* 208 */ + /* float4_accum */ + { + ArrayType *transarray = DatumGetArrayTypeP(pergroupstate->transValue); + + float8 newval = DatumGetFloat4(inputValues[slot_index]); + float8 *transvalues; + float8 N, + Sx, + Sxx, + tmp; + + transvalues = check_float8_array(transarray, "float4_accum", 3); + N = transvalues[0]; + Sx = transvalues[1]; + Sxx = transvalues[2]; + + /* + * Use the Youngs-Cramer algorithm to incorporate the new value + * into the transition values. + */ + + N += 1.0; + Sx += newval; + if (transvalues[0] > 0.0) + { + tmp = newval * N - Sx; + Sxx += tmp * tmp / (N * transvalues[0]); + + /* + * Overflow check. We only report an overflow error when + * finite inputs lead to infinite results. Note also that Sxx + * should be NaN if any of the inputs are infinite, so we + * intentionally prevent Sxx from becoming infinite. + */ + if (isinf(Sx) || isinf(Sxx)) + { + if (!isinf(transvalues[1]) && !isinf(newval)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value out of range: overflow"))); + + Sxx = get_float8_nan(); + } + } + + transvalues[0] = N; + transvalues[1] = Sx; + transvalues[2] = Sxx; + + newVal = pergroupstate->transValue; + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_FLOAT4PL /* 204 */ + /* float4pl */ + { + float4 arg1 = DatumGetFloat4(pergroupstate->transValue); + float4 arg2 = DatumGetFloat4(inputValues[slot_index]); + float4 result; + + result = arg1 + arg2; + + CHECKFLOATVAL(result, isinf(arg1) || isinf(arg2), true); + newVal = Float4GetDatum(result); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_FLOAT4LARGER /* 209 */ + /* float4larger */ + { + float4 arg1 = DatumGetFloat4(pergroupstate->transValue); + float4 arg2 = DatumGetFloat4(inputValues[slot_index]); + float4 result; + + if (float4_gt(arg1, arg2)) + result = arg1; + else + result = arg2; + newVal = Float4GetDatum(result); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_FLOAT4SMALLER /* 211 */ + /* float4smaller */ + { + float4 arg1 = DatumGetFloat4(pergroupstate->transValue); + float4 arg2 = DatumGetFloat4(inputValues[slot_index]); + float4 result; + + if (float4_lt(arg1, arg2)) + result = arg1; + else + result = arg2; + newVal = Float4GetDatum(result); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_FLOAT8PL /* 218 */ + /* float8pl */ + { + float8 arg1 = DatumGetFloat8(pergroupstate->transValue); + float8 arg2 = DatumGetFloat8(inputValues[slot_index]); + float8 result; + + result = arg1 + arg2; + + CHECKFLOATVAL(result, isinf(arg1) || isinf(arg2), true); + newVal = Float8GetDatum(result); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_INT4LARGER /* 768 */ + /* int4larger */ + { + int32 arg1 = DatumGetInt32(pergroupstate->transValue); + int32 arg2 = DatumGetInt32(inputValues[slot_index]); + + newVal = Int32GetDatum((arg1 > arg2) ? arg1 : arg2); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_INT4SMALLER /* 769 */ + /* int4smaller */ + { + int32 arg1 = DatumGetInt32(pergroupstate->transValue); + int32 arg2 = DatumGetInt32(inputValues[slot_index]); + + newVal = Int32GetDatum((arg1 < arg2) ? arg1 : arg2); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_CASH_PL /* 894 */ + /* cash_pl */ + { + Cash c1 = DatumGetCash(pergroupstate->transValue); + Cash c2 = DatumGetCash(inputValues[slot_index]); + Cash result; + + result = c1 + c2; + + newVal = CashGetDatum(result); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_CASHLARGER /* 898 */ + /* cashlarger */ + { + Cash c1 = DatumGetCash(pergroupstate->transValue); + Cash c2 = DatumGetCash(inputValues[slot_index]); + Cash result; + + result = (c1 > c2) ? c1 : c2; + + newVal = CashGetDatum(result); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_CASHSMALLER /* 899 */ + /* cashsmaller */ + { + Cash c1 = DatumGetCash(pergroupstate->transValue); + Cash c2 = DatumGetCash(inputValues[slot_index]); + Cash result; + + result = (c1 < c2) ? c1 : c2; + + newVal = CashGetDatum(result); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_DATE_LARGER /* 1138 */ + /* date_larger */ + { + DateADT dateVal1 = DatumGetDateADT(pergroupstate->transValue); + DateADT dateVal2 = DatumGetDateADT(inputValues[slot_index]); + + newVal = DateADTGetDatum((dateVal1 > dateVal2) ? dateVal1 : dateVal2); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_DATE_SMALLER /* 1139 */ + /* date_smaller */ + { + DateADT dateVal1 = DatumGetDateADT(pergroupstate->transValue); + DateADT dateVal2 = DatumGetDateADT(inputValues[slot_index]); + + newVal = DateADTGetDatum((dateVal1 < dateVal2) ? dateVal1 : dateVal2); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_INTERVAL_PL /* 1169 */ + /* interval_pl */ + { + Interval *span1 = DatumGetIntervalP(pergroupstate->transValue); + Interval *span2 = DatumGetIntervalP(inputValues[slot_index]); + Interval *result; + + result = palloc_object(Interval); + + /* + * Handle infinities. + * + * We treat anything that amounts to "infinity - infinity" as an + * error, since the interval type has nothing equivalent to NaN. + */ + if (INTERVAL_IS_NOBEGIN(span1)) + { + if (INTERVAL_IS_NOEND(span2)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + else + INTERVAL_NOBEGIN(result); + } + else if (INTERVAL_IS_NOEND(span1)) + { + if (INTERVAL_IS_NOBEGIN(span2)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + else + INTERVAL_NOEND(result); + } + else if (INTERVAL_NOT_FINITE(span2)) + memcpy(result, span2, sizeof(Interval)); + else + finite_interval_pl(span1, span2, result); + + newVal = IntervalPGetDatum(result); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_TIMESTAMP_SMALLER /* 1195 */ + /* timestamp_smaller */ + { + Timestamp dt1 = DatumGetTimestamp(pergroupstate->transValue); + Timestamp dt2 = DatumGetTimestamp(inputValues[slot_index]); + Timestamp result; + + /* + * use timestamp_cmp_internal to be sure this agrees with + * comparisons + */ + if (timestamp_cmp_internal(dt1, dt2) < 0) + result = dt1; + else + result = dt2; + newVal = TimestampGetDatum(result); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_TIMESTAMP_LARGER /* 1196 */ + /* timestamp_larger */ + { + Timestamp dt1 = DatumGetTimestamp(pergroupstate->transValue); + Timestamp dt2 = DatumGetTimestamp(inputValues[slot_index]); + Timestamp result; + + if (timestamp_cmp_internal(dt1, dt2) > 0) + result = dt1; + else + result = dt2; + newVal = TimestampGetDatum(result); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_INTERVAL_SMALLER /* 1197 */ + /* interval_smaller */ + { + Interval *interval1 = DatumGetIntervalP(pergroupstate->transValue); + Interval *interval2 = DatumGetIntervalP(inputValues[slot_index]); + Interval *result; + + /* + * use interval_cmp_internal to be sure this agrees with + * comparisons + */ + if (interval_cmp_internal(interval1, interval2) < 0) + result = interval1; + else + result = interval2; + newVal = IntervalPGetDatum(result); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_INTERVAL_LARGER /* 1198 */ + /* interval_larger */ + { + Interval *interval1 = DatumGetIntervalP(pergroupstate->transValue); + Interval *interval2 = DatumGetIntervalP(inputValues[slot_index]); + Interval *result; + + if (interval_cmp_internal(interval1, interval2) > 0) + result = interval1; + else + result = interval2; + newVal = IntervalPGetDatum(result); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_INT8INC /* 1219 */ + { + newVal = Int64GetDatum(DatumGetInt64(pergroupstate->transValue) + 1); + + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_INT8INC_ANY /* 2804 */ + /* Mostly same as F_INT8INC, but NULL-check for arguments is done */ + { + newVal = Int64GetDatum(DatumGetInt64(pergroupstate->transValue) + 1); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_TIME_LARGER /* 1377 */ + /* time_larger */ + { + TimeADT time1 = DatumGetTimeADT(pergroupstate->transValue); + TimeADT time2 = DatumGetTimeADT(inputValues[slot_index]); + + newVal = TimeADTGetDatum((time1 > time2) ? time1 : time2); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_TIME_SMALLER /* 1378 */ + /* time_smaller */ + { + TimeADT time1 = DatumGetTimeADT(pergroupstate->transValue); + TimeADT time2 = DatumGetTimeADT(inputValues[slot_index]); + + newVal = TimeADTGetDatum((time1 < time2) ? time1 : time2); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_TIMETZ_LARGER /* 1379 */ + /* timetz_larger */ + { + TimeTzADT *time1 = DatumGetTimeTzADTP(pergroupstate->transValue); + TimeTzADT *time2 = DatumGetTimeTzADTP(inputValues[slot_index]); + TimeTzADT *result; + + if (timetz_cmp_internal(time1, time2) > 0) + result = time1; + else + result = time2; + newVal = TimeTzADTPGetDatum(result); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_TIMETZ_SMALLER /* 1380 */ + /* timetz_smaller */ + { + TimeTzADT *time1 = DatumGetTimeTzADTP(pergroupstate->transValue); + TimeTzADT *time2 = DatumGetTimeTzADTP(inputValues[slot_index]); + TimeTzADT *result; + + if (timetz_cmp_internal(time1, time2) < 0) + result = time1; + else + result = time2; + newVal = TimeTzADTPGetDatum(result); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_INT2_SUM /* 1840 */ + /* int2_sum */ + { + int64 newval; + + newIsNull = false; + if (pergroupstate->transValueIsNull) + { + if (inputIsNulls[slot_index]) + { + newval = 0; + newIsNull = true; + } + else + newval = (int64) DatumGetInt16(inputValues[slot_index]); + } + else + { + int64 oldsum = DatumGetInt64(pergroupstate->transValue); + + if (inputIsNulls[slot_index]) + newval = oldsum; + else + newval = oldsum + (int64) DatumGetInt16(inputValues[slot_index]); + } + newVal = Int64GetDatum(newval); + } + +#elif VCI_TRANFN_OID == F_INT4_SUM /* 1841 */ + /* int4_sum */ + { + int64 newval; + + newIsNull = false; + if (pergroupstate->transValueIsNull) + { + if (inputIsNulls[slot_index]) + { + newval = 0; + newIsNull = true; + } + else + newval = (int64) DatumGetInt32(inputValues[slot_index]); + } + else + { + int64 oldsum = DatumGetInt64(pergroupstate->transValue); + + if (inputIsNulls[slot_index]) + newval = oldsum; + else + newval = oldsum + (int64) DatumGetInt32(inputValues[slot_index]); + } + newVal = Int64GetDatum(newval); + } + +#elif VCI_TRANFN_OID == F_INT4AND /* 1898 */ + /* int4and */ + { + int32 arg1 = DatumGetInt32(pergroupstate->transValue); + int32 arg2 = DatumGetInt32(inputValues[slot_index]); + + newVal = Int32GetDatum(arg1 & arg2); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_INT4OR /* 1899 */ + /* int4or */ + { + int32 arg1 = DatumGetInt32(pergroupstate->transValue); + int32 arg2 = DatumGetInt32(inputValues[slot_index]); + + newVal = Int32GetDatum(arg1 | arg2); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_INT4_AVG_ACCUM /* 1963 */ + /* int4_avg_accum */ + { + ArrayType *transarray = DatumGetArrayTypeP(pergroupstate->transValue); + int32 newval = DatumGetInt32(inputValues[slot_index]); + Int8TransTypeData *transdata; + + if (ARR_HASNULL(transarray) || + ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData)) + elog(ERROR, "expected 2-element int8 array"); + + transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray); + transdata->count++; + transdata->sum += newval; + + newVal = pergroupstate->transValue; + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_BOOLAND_STATEFUNC /* 2515 */ + /* booland_statefunc */ + { + newVal = BoolGetDatum( + DatumGetBool(pergroupstate->transValue) && DatumGetBool(inputValues[slot_index])); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_BOOLOR_STATEFUNC /* 2516 */ + /* boolor_statefunc */ + { + newVal = BoolGetDatum( + DatumGetBool(pergroupstate->transValue) || DatumGetBool(inputValues[slot_index])); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_INT2LARGER /* 770 */ + /* int2larger */ + { + int16 arg1 = DatumGetInt16(pergroupstate->transValue); + int16 arg2 = DatumGetInt16(inputValues[slot_index]); + + newVal = Int16GetDatum((arg1 > arg2) ? arg1 : arg2); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_INT2SMALLER /* 771 */ + /* int2smaller */ + { + int16 arg1 = DatumGetInt16(pergroupstate->transValue); + int16 arg2 = DatumGetInt16(inputValues[slot_index]); + + newVal = Int16GetDatum((arg1 < arg2) ? arg1 : arg2); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_INT2AND /* 1892 */ + /* int2and */ + { + int16 arg1 = DatumGetInt16(pergroupstate->transValue); + int16 arg2 = DatumGetInt16(inputValues[slot_index]); + + newVal = Int16GetDatum(arg1 & arg2); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_INT2OR /* 1893 */ + /* int2or */ + { + int16 arg1 = DatumGetInt16(pergroupstate->transValue); + int16 arg2 = DatumGetInt16(inputValues[slot_index]); + + newVal = Int16GetDatum(arg1 | arg2); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_INT2_AVG_ACCUM /* 1962 */ + /* int2_avg_accum */ + { + ArrayType *transarray = DatumGetArrayTypeP(pergroupstate->transValue); + int16 newval = DatumGetInt16(inputValues[slot_index]); + Int8TransTypeData *transdata; + + if (ARR_HASNULL(transarray) || + ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData)) + elog(ERROR, "expected 2-element int8 array"); + + transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray); + transdata->count++; + transdata->sum += newval; + + newVal = pergroupstate->transValue; + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_INT8LARGER /* 1236 */ + /* int8larger */ + { + int64 arg1 = DatumGetInt64(pergroupstate->transValue); + int64 arg2 = DatumGetInt64(inputValues[slot_index]); + + newVal = Int64GetDatum((arg1 > arg2) ? arg1 : arg2); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_INT8SMALLER /* 1237 */ + /* int8smaller */ + { + int64 arg1 = DatumGetInt64(pergroupstate->transValue); + int64 arg2 = DatumGetInt64(inputValues[slot_index]); + + newVal = Int64GetDatum((arg1 < arg2) ? arg1 : arg2); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_INT8AND /* 1904 */ + /* int8and */ + { + int64 arg1 = DatumGetInt64(pergroupstate->transValue); + int64 arg2 = DatumGetInt64(inputValues[slot_index]); + + newVal = Int64GetDatum(arg1 & arg2); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_INT8OR /* 1905 */ + /* int8or */ + { + int64 arg1 = DatumGetInt64(pergroupstate->transValue); + int64 arg2 = DatumGetInt64(inputValues[slot_index]); + + newVal = Int64GetDatum(arg1 | arg2); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_FLOAT8_ACCUM /* 222 */ + /* float8_accum */ + { + ArrayType *transarray = DatumGetArrayTypeP(pergroupstate->transValue); + + float8 newval = DatumGetFloat8(inputValues[slot_index]); + float8 *transvalues; + float8 N, + Sx, + Sxx, + tmp; + + transvalues = check_float8_array(transarray, "float8_accum", 3); + N = transvalues[0]; + Sx = transvalues[1]; + Sxx = transvalues[2]; + + N += 1.0; + Sx += newval; + if (transvalues[0] > 0.0) + { + tmp = newval * N - Sx; + Sxx += tmp * tmp / (N * transvalues[0]); + + /* + * Overflow check. We only report an overflow error when + * finite inputs lead to infinite results. Note also that Sxx + * should be NaN if any of the inputs are infinite, so we + * intentionally prevent Sxx from becoming infinite. + */ + if (isinf(Sx) || isinf(Sxx)) + { + if (!isinf(transvalues[1]) && !isinf(newval)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value out of range: overflow"))); + + Sxx = get_float8_nan(); + } + } + + transvalues[0] = N; + transvalues[1] = Sx; + transvalues[2] = Sxx; + + newVal = pergroupstate->transValue; + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_FLOAT8LARGER /* 223 */ + /* float8larger */ + { + float8 arg1 = DatumGetFloat8(pergroupstate->transValue); + float8 arg2 = DatumGetFloat8(inputValues[slot_index]); + float8 result; + + if (float8_cmp_internal(arg1, arg2) > 0) + result = arg1; + else + result = arg2; + newVal = Float8GetDatum(result); + newIsNull = false; + } + +#elif VCI_TRANFN_OID == F_FLOAT8SMALLER /* 224 */ + /* float8smaller */ + { + float8 arg1 = DatumGetFloat8(pergroupstate->transValue); + float8 arg2 = DatumGetFloat8(inputValues[slot_index]); + float8 result; + + if (float8_cmp_internal(arg1, arg2) < 0) + result = arg1; + else + result = arg2; + newVal = Float8GetDatum(result); + newIsNull = false; + } + +#else /* default */ + { + FunctionCallInfo fcinfo = peraggstate->transfn_fcinfo; + + fcinfo->args[0].value = pergroupstate->transValue; + fcinfo->args[0].isnull = pergroupstate->transValueIsNull; + fcinfo->args[1].value = inputValues[slot_index]; + fcinfo->args[1].isnull = inputIsNulls[slot_index]; + fcinfo->isnull = false; + newVal = FunctionCallInvoke(fcinfo); + newIsNull = fcinfo->isnull; + } +#endif + +#ifdef VCI_TRANS_USE_CURPERAGG + aggstate->pseudo_aggstate->curperagg = NULL; +#endif + +#if VCI_TRANS_TYPE_BYVAL == -1 + if (!peraggstate->transtypeByVal && + DatumGetPointer(newVal) != DatumGetPointer(pergroupstate->transValue)) + { + if (!newIsNull) + { + MemoryContextSwitchTo(aggstate->aggcontext); + newVal = datumCopy(newVal, + peraggstate->transtypeByVal, + peraggstate->transtypeLen); + } + if (!pergroupstate->transValueIsNull) + pfree(DatumGetPointer(pergroupstate->transValue)); + } +#elif VCI_TRANS_TYPE_BYVAL == 0 + if (DatumGetPointer(newVal) != DatumGetPointer(pergroupstate->transValue)) + { + if (!newIsNull) + { + MemoryContextSwitchTo(aggstate->aggcontext); + newVal = datumCopy(newVal, + peraggstate->transtypeByVal, + peraggstate->transtypeLen); + } + if (!pergroupstate->transValueIsNull) + pfree(DatumGetPointer(pergroupstate->transValue)); + } +#endif + + pergroupstate->transValue = newVal; + pergroupstate->transValueIsNull = newIsNull; + +#if VCI_TRANS_TYPE_BYVAL <= 0 + MemoryContextSwitchTo(oldContext); +#endif + } +} diff --git a/contrib/vci/include/vci_executor.h b/contrib/vci/include/vci_executor.h new file mode 100644 index 0000000..481f80a --- /dev/null +++ b/contrib/vci/include/vci_executor.h @@ -0,0 +1,895 @@ +/*------------------------------------------------------------------------- + * vci_executor.h + * Definitions and declarations about executor modules + * + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/include/vci_executor.h + * + *------------------------------------------------------------------------- + */ +#ifndef VCI_EXECUTOR_H +#define VCI_EXECUTOR_H + +#include "postgres.h" + +#include "access/htup.h" +#include "access/tupdesc.h" +#include "executor/execdesc.h" +#include "executor/execExpr.h" +#include "executor/instrument.h" +#include "nodes/bitmapset.h" +#include "nodes/execnodes.h" +#include "nodes/extensible.h" +#include "nodes/nodes.h" +#include "nodes/parsenodes.h" +#include "nodes/plannodes.h" +#include "nodes/pathnodes.h" +#include "storage/buffile.h" + +#include "vci_fetch.h" + +struct VciAgg; +struct VciAggState; + +/* + * MemoryContext size used during query execution + */ +#define VCI_ALLOCSET_DEFAULT_MINSIZE (0) +#define VCI_ALLOCSET_DEFAULT_INITSIZE ( 8 * 1024 * 1024) +#define VCI_ALLOCSET_DEFAULT_MAXSIZE (512 * 1024 * 1024) + +/** + * Maximum number of fetch rows specified in vci_CSCreateFetchContext() + */ +#define VCI_NUM_ROWS_READ_AT_ONCE (32 * 1024) + +/** + * Maximum number of rows to fetch at one time specified in vci_CSGetSkipFromVirtualTuples() + */ +#define VCI_MAX_FETCHING_ROWS (128) + +/** + * Number of slot to allocate for Skip List + */ +#define VCI_MAX_SKIP_LIST_SLOTS (VCI_MAX_FETCHING_ROWS + 1) + +/** + * Initial number of element in plan_info_map[] + */ +#define VCI_INIT_PLAN_INFO_ENTRIES (16) + +struct ExplainState; +struct VciScanState; +struct VciVPContext; +struct VciVPNode; +struct VciScalarArrayOpExprHashTable; + +/** + * Column store fetch management information per VCI Scan + * + * @note This struct is instantiated on SMC + */ +typedef struct +{ + /** + * The fetch context created by vci_CSCreateFetchContext() in the backend + * is recorded in the fetch_context member of VciScanState as the master. + * However, it is also recorded in this member variable so that it can be referenced from parallel workers. + */ + vci_CSFetchContext fetch_context; + + /** + * Pointer to VCI Scan State for reading VCI index (referenced only when abort) + * Used only on backend side. Reading it on the parallel worker side results in dangling pointer + */ + struct VciScanState *scanstate; + +} vci_fetch_placeholder_t; + +/** + * Column store fetch management information for each VCI index + * + * + * @note This struct instance is taken on SMC. + */ +typedef struct +{ + Oid indexoid; /* OID of VCI index */ + Bitmapset *attr_used; /* Bitmap indicating the column position + * referenced in VCI index */ + int num_fetches; /* Number of VCI Scan that refer to VCI index + * of indexoid */ + + vci_CSQueryContext query_context; /* Column Store Query Context */ + vci_local_ros_t *volatile local_ros; /* Pointer to Local ROS */ + + vci_fetch_placeholder_t *fetch_ph_table; /* Pointer to + * vci_fetch_placeholder_t + * array struct. Number of + * element in the array is + * num_fetches */ + +} vci_index_placeholder_t; + +/** + * Data struct that records the correspondence between Plan State and Plan in query + * + * - Plan is on SMC and is common between backend processes and parallel workers. + * - Plan State refers to data in the local memory of the backend process. + * + * @note This struct instance is taken on SMC. + */ +typedef struct +{ + Plan *plan; /* plan (on SMC) */ + PlanState *planstate; /* PlanState on backend side */ + Instrumentation instrument; /* Instrumentation for aggregating + * Instrumentation of parallel workers during + * parallel execution */ +} vci_plan_info_t; + +/** + * Column store fetch management information for each query + */ +typedef struct vci_query_context +{ + /** + * Memory context for allocatin gmemory related to Column Store Fetch + * + * - Expect it to be SMC + * - vci_query_context also be instatiated in mcontext + */ + MemoryContext mcontext; + + /** + * Used to use contention when writing data in the vci_query_context + * struct from a parallel worker. + */ + LWLock *lock; + + /** + * Number of VCI index referenced in query + */ + int num_indexes; + + /** + * Array into placeholder for VCI index referenced in this query + * The size is num_indexes. + */ + vci_index_placeholder_t *index_ph_table; + + /** + * If stops in the middle of custom plan execution + */ + bool has_stopped; + + /** + * planned stmt rewritten into VCI plan + */ + PlannedStmt *plannedstmt; + + /** + * Original planned stmt before rewrite. + * Used when custom plan execution is canceled. + */ + PlannedStmt *origplannedstmt; + + /** + * Maximum number of elements allocated for plan_info_map[] + */ + int max_plan_info_entries; + + /** + * Array containing all Plan and PlanState pairs on backend side + * Accessed by plan_info_map[plan->plan_no - 1] + * + * Used to find PlanState corresponding to plan in + * vci_exec_set_param_plan_as_proxy(). + */ + vci_plan_info_t *plan_info_map; + +} vci_query_context_t; + +/** + * Pointer to column store fetch management object for each query + */ +extern vci_query_context_t *vci_query_context; + +/* ---------------- + * Vector processing + * ---------------- + */ + +/** + * ExprState number in VciVPContext + */ +typedef unsigned int vci_vp_item_id; + +/** + * Templete of function pointer for Vector Processing + */ +typedef void (*VciVPExecOp_func) (Expr *expression, struct VciVPNode *vpnode, struct VciVPContext *vpcontext, ExprContext *econtext, int max_slots); + +/** + * Vector Processing's node + * + * Converted from Expression state node. + */ +typedef struct VciVPNode +{ + VciVPExecOp_func evalfunc; /* Function to process this VP node */ + Expr *expr; + int len_args; /* Max number of elements in arg_items[] */ + vci_vp_item_id *arg_items; /* Item number of the child VP node of this VP + * node */ + + Datum *itemValue; /* Datum array that records this VP node + * process result. Number of element is + * allocated VCI_MAX_FETCHING_ROWS. */ + bool *itemIsNull; /* bool array that records this VP node + * process result. Number of element is + * allocated VCI_MAX_FETCHING_ROWS. */ + uint16 *skip_list; /* Skip list usued during this VP node process */ + + /** Auxiliary information for some VP node types*/ + union + { + /** Original skip list configured on the control VP node */ + struct + { + uint16 *orig_skip_list; + } init; + + /** Used as storage location for intermediate data during processing of VP nodes based on T_CoerceToDomain */ + struct + { + Oid resulttype; + char *name; + } coerce_to_domain; + + struct + { + int paramid; /* numeric ID for parameter */ + Oid paramtype; /* OID of parameter's datatype */ + Plan *vci_parent_plan; + } param; + + struct + { + FmgrInfo *finfo; /* function's lookup data */ + FunctionCallInfo fcinfo_data; /* arguments etc */ + /* faster to access without additional indirection: */ + PGFunction fn_addr; /* actual call address */ + int nargs; /* number of arguments */ + } func; + + struct + { + /* element_type/typlen/typbyval/typalign are filled at runtime */ + Oid element_type; /* InvalidOid if not yet filled */ + bool useOr; /* use OR or AND semantics? */ + int16 typlen; /* array element type storage info */ + bool typbyval; + char typalign; + FmgrInfo *finfo; /* function's lookup data */ + FunctionCallInfo fcinfo_data; /* arguments etc */ + /* faster to access without additional indirection: */ + PGFunction fn_addr; /* actual call address */ + } scalararrayop; + + struct + { + bool has_nulls; + struct VciScalarArrayOpExprHashTable *elements_tab; + FmgrInfo *finfo; /* function's lookup data */ + FunctionCallInfo fcinfo_data; /* arguments etc */ + /* faster to access without additional indirection: */ + PGFunction fn_addr; /* actual call address */ + FmgrInfo *hash_finfo; /* function's lookup data */ + FunctionCallInfo hash_fcinfo_data; /* arguments etc */ + /* faster to access without additional indirection: */ + PGFunction hash_fn_addr; /* actual call address */ + } hashedscalararrayop; + + struct + { + /* lookup and call info for source type's output function */ + FmgrInfo *finfo_out; + /* lookup and call info for result type's input function */ + FmgrInfo *finfo_in; + Oid typioparam; + + /* + * Below ones used in OSS are not required for VCI as these + * information will be filled by InitFunctionCallInfoData in eval + * execute function itself FunctionCallInfo fcinfo_data_out; + * FunctionCallInfo fcinfo_data_in; + */ + } iocoerce; + + } data; +} VciVPNode; + +/** + * Vector processing context + * + * Converted from Expression tree. + */ +typedef struct VciVPContext +{ + vci_vp_item_id num_item; /* Currently assigned maximum item number */ + vci_vp_item_id max_item; /* Maximum number of nodes reserved by VP + * context */ + VciVPNode *itemNode; /* Array of VP node */ + + Datum *resultValue; /* Array of Datum that is the final result + * when VP context is processed */ + bool *resultIsNull; /* Array of bool that is the final result when + * VP context is processed */ + + vci_vp_item_id caseValue; /* Temporarily records caseValue during + * execution of VciExecEvalVectorProcessing() */ + vci_vp_item_id domainValue; /* Temporarily records domainValue during + * execution of VciExecEvalVectorProcessing() */ + +} VciVPContext; + +extern void VciExecEvalVectorProcessing(VciVPContext *vpcontext, ExprContext *econtext, int max_slots); +extern VciVPContext *VciBuildVectorProcessing(Expr *node, PlanState *parent, ExprContext *econtext, uint16 *skip_list); + +/* ---------------- + * Projection information for VCI + * ---------------- + */ + +/** + * Data struct that records how each target in the target list was processed in VciProjectionInfo + */ +typedef struct VciProjectionInfoSlot +{ + bool is_simple_var; + + union + { + /* Record here if is_simple_var is true */ + struct + { + Index relid; /* Copy varno value of Var */ + AttrNumber attno; /* Copy varattno value of Var */ + } simple_var; + + /* Record here if is_simple_var is false */ + struct + { + int expr_id; /* Converted to pi_vp_tle_array[expr_id] + * in VciProjectionInfo */ + } expr; + } data; +} VciProjectionInfoSlot; + +/** + * ProjectionInfo for VCI + * + * The exprlist in ProjectionInfo is an array of VciVPContext pointers for vector processing. + * + * @note The ProjectionInfo type in PostgreSQL and the VciProjectionInfo type in VCI are almost identical, + * but the former loses information about which position in the original target list the simple_var and pi_targetlist were + * created from, while the latter manages this information using pi_slotMap. + */ +typedef struct VciProjectionInfo +{ + /* instructions to evaluate projection */ + ExprState pi_state; + TargetEntry **pi_tle_array; /* Array of expression state tree under + * TargetEntry that was converted */ + VciVPContext **pi_vp_tle_array; /* Array of VP context */ + int pi_tle_array_len; /* Maximum number of element of + * pi_vp_tle_array[] */ + ExprContext *pi_exprContext; /* Execute context for executing this + * VciProjectionInfo */ + TupleTableSlot *pi_slot; /* TupleTableSlot that contains this + * VciProjectionInfo result */ + bool pi_directMap; + int pi_numSimpleVars; /* Number of Simple Vars */ + int *pi_varSlotOffsets; /* Pointer of mapping information used by + * Simple Vars */ + int *pi_varNumbers; /* Pointer of mapping information used by + * Simple Vars */ + int *pi_varOutputCols; /* Pointer of mapping information used by + * Simple Vars */ + VciProjectionInfoSlot *pi_slotMap; /* Map information that records + * whether each target list was + * converted to SimpleVar or VP + * context. */ + int pi_lastInnerVar; + int pi_lastOuterVar; + int pi_lastScanVar; +} VciProjectionInfo; + +/* ---------------- + * VCI Scan/Sort/Agg Common Definitions + * ---------------- + */ + +/* + * Macros specified in flags of CustomScan and CustomScanState + */ +#define VCI_CUSTOMPLAN_MASK (0x00F0) +#define VCI_CUSTOMPLAN_SCAN (0x0010) +#define VCI_CUSTOMPLAN_SORT (0x0020) +#define VCI_CUSTOMPLAN_AGG (0x0030) +#define VCI_CUSTOMPLAN_GATHER (0x0060) + +/** + * VCI based Plan node + */ +typedef struct VciPlan +{ + CustomScan cscan; /* Base class CustomScan */ + + /* + * The following parameters are set by the (sequential) scheduler. + */ + int preset_eflags; /* eflags precalculated for parallel + * scheduling */ + + AttrNumber scan_plan_no; /* Plan Number for VCI Scan that becomes a + * partitioned table */ + + /** Cache of vci_search_scan() result */ + struct VciScan *scan_cached; + + /** Plan to be rewritten. Become NULL when copyObject() is called */ + Plan *orig_plan; +} VciPlan; + +/** + * VCI based Plan State node + */ +typedef struct VciPlanState +{ + CustomScanState css; /* Base class CustomScanState */ + + /** Cache of vci_search_scan_state() result */ + struct VciScanState *scanstate_cached; + +} VciPlanState; + +/** + * VCI Scan node + */ +typedef struct VciScan +{ + VciPlan vci; /* Base class VCI Plan */ + + VciScanMode scan_mode; + + Index scanrelid; /* relid of table to be scanned */ + Oid reloid; /* OID of table to be scanned */ + Oid indexoid; /* OID of VCi index that actually reads data */ + Bitmapset *attr_used; /* Bitmap of column (attribute) to scans */ + int num_attr_used; /* Number of scan column */ + bool is_all_simple_vars; /* Target list is configured with + * SimpleVar */ + double estimate_tuples; /* Estimated number of rows in the scanned + * table */ + bool is_subextent_grain; /* Execute sub-extent fine-grained + * parallelization or not */ + Index index_ph_id; /* index_ph_table[index_ph_id-1] of + * vci_query_context_t */ + Index fetch_ph_id; /* index_ph_table[index_ph_id-1].fetch_ph_table[fetch_ph_id-1] + * of vci_query_context_t */ +} VciScan; + +/** + * VCI Scan State node + */ +typedef struct VciScanState +{ + VciPlanState vci; /* Base class VCI Plan State */ + + bool is_subextent_grain; /* Execute sub-extent fine-grained + * parallelization or not */ + + /* + * Column store fetch setting + */ + vci_CSFetchContext fetch_context; /* Columnar fetch context (master) */ + vci_CSFetchContext local_fetch_context; /* Columnar fetch context (locale + * of each process) */ + vci_extent_status_t *extent_status; /* extent information */ + vci_virtual_tuples_t *vector_set; /* vector set */ + + AttrNumber last_attr; /* Biggest Attr Number */ + int *attr_map; /* Map that substracts column store fetch id + * from Attr Number */ + + int32 first_extent_id; /* Extent number that starts reading */ + int32 last_extent_id; /* Extent number that finishes reading + * (exclusive) */ + int64 first_crid; /* CRID that starts read */ + int64 last_crid; /* CRID that finishes read (exclusive) */ + + /* + * The following are read and written during column store fetch execution. + */ + + /** + * true when the first column store fetch is executed + * + * Set to false before executing column store fetch + */ + bool first_fetch; + + VciFetchPos pos; /* Current column store fetch location */ + VciFetchPos mark; /* Column store fetch location recorded in + * mergr */ + + VciVPContext *vp_qual; /* VP context converted from qual */ + + VciProjectionInfo *vps_ProjInfo; /* when generating oputput with non-VP */ + + /* + * The result of vector processing will be recorded in + * result_values[resind][i] and result_isnull[resind][i]. With resind is + * order of target list and i is number in vector + */ + Datum **result_values; /** Process result after Vector processing (value information) */ + bool **result_isnull; /** Process result after Vector processing (NULL information) */ + + /** + * Number of Vector processing context + */ + int num_vp_targets; + + /** + * Arrays to pointer to Vector processing context + */ + VciVPContext **vp_targets; + + /*** + * true when parallel worker receives NULL + */ + bool scan_done; + +} VciScanState; + +/** + * VCI Sort node + */ +typedef struct VciSort +{ + VciPlan vci; /* Base class VCI Plan */ + + int numCols; /* number of sort-key columns */ + AttrNumber *sortColIdx; /* their indexes in the target list */ + Oid *sortOperators; /* OIDs of operators to sort them by */ + Oid *collations; /* OIDs of collations */ + bool *nullsFirst; /* NULLS FIRST/LAST directions */ +} VciSort; + +/** + * VCI Sort State node + */ +typedef struct VciSortState +{ + VciPlanState vci; /* Base class VCI Plan State */ + + bool randomAccess; /* need random access to sort output? */ + bool bounded; /* is the result set bounded? */ + int64 bound; /* if bounded, how many tuples are needed */ + bool sort_Done; /* sort completed yet? */ + bool bounded_Done; /* value of bounded we did the sort with */ + int64 bound_Done; /* value of bound we did the sort with */ + void *tuplesortstate; /* private state of tuplesort.c */ + + ScanDirection saved_dir; /* area to store estate->es_direction */ +} VciSortState; + +/** + * VCI Agg node + */ +typedef struct VciAgg +{ + VciPlan vci; /* base class VCI Plan State */ + + AggStrategy aggstrategy; + int numCols; /* number of grouping columns */ + AttrNumber *grpColIdx; /* their indexes in the target list */ + Oid *grpOperators; /* equality operators to compare with */ + Oid *grpCollations; + int64 numGroups; /* estimated number of groups in input */ +} VciAgg; + +typedef struct VciAggStatePerAggData *VciAggStatePerAgg; +typedef struct VciAggStatePerGroupData *VciAggStatePerGroup; + +/** + * VCI Agg State node + */ +typedef struct VciAggState +{ + VciPlanState vci; /* Base class VCI Plan State */ + + bool enable_vp; /* Is vector processing possible or not */ + + VciProjectionInfo *vps_ProjInfo; /* ProjectionInfo when generating Agg + * State output */ + + List *aggs; /* all Aggref nodes in targetlist & quals */ + int numaggs; /* length of list (could be zero!) */ + Oid *eqfuncoids; /* per-grouping-field equality fn oids */ + ExprState **eqfunctions; /* expression returning equality */ + FmgrInfo *hashfunctions; /* per-grouping-field hash fns */ + VciAggStatePerAgg peragg; /* per-Aggref information */ + MemoryContext hash_metacxt; /* memory for hash table bucket array */ + MemoryContext hash_tuplescxt; /* memory for hash table tuples */ + MemoryContext aggcontext; /* memory context for long-lived data */ + ExprContext *tmpcontext; /* econtext for input expressions */ + bool agg_done; /* indicates completion of Agg scan */ + /* these fields are used in AGG_PLAIN and AGG_SORTED modes: */ + VciAggStatePerGroup pergroup; /* per-Aggref-per-group working state */ + HeapTuple grp_firstTuple; /* copy of first tuple of current group */ + /* these fields are used in AGG_HASHED mode: */ + TupleTableSlot *hashslot; /* slot for loading hash table */ + TupleHashTable hashtable; /* hash table with one entry per group */ + int last_hash_column; + int *hash_needed; /* array of columns needed in hash table */ + int num_hash_needed; /* number of columns needed in hash table */ + Datum **hash_input_values; /* array of pointers to datum vector for + * each hash key */ + bool **hash_input_isnull; /* array of pointers to null vector for + * each ehash key */ + bool table_filled; /* hash table filled yet? */ + TupleHashIterator hashiter; /* for iterating through hash table */ + + /* + * aggregation function changes its behaviour by checking AggState + * Therefore, ExecEvalExpr() shows dummy AggState, not VciAggState + */ + AggState *pseudo_aggstate; + + /** + * Record VciAggHashEntry before copying to SMC, in case of parallel worker + * encounter out-of-memory error in SMC. + * Usually set to NULL. + */ + volatile TupleHashEntry saved_entry; + + /** + * Similar to saved_entry, but only records the first HeapTuple of + * each group in plain/sorted aggregation + * Usually set to NULL. + */ + volatile HeapTuple saved_grp_firstTuple; + +} VciAggState; + +typedef void (*VciAdvanceAggref_Func) (VciAggState *, int, VciAggStatePerGroup *, int); + +extern VciAdvanceAggref_Func VciGetSpecialAdvanceAggrefFunc(VciAggStatePerAgg peraggstate); + +/* ---------------- + * VCI Gather information + * ---------------- + */ + +typedef struct VciGather +{ + VciPlan vci; + +} VciGather; + +typedef struct VciGatherState +{ + VciPlanState vci; +} VciGatherState; + +/* ---------------- + * VCI Var State + * ---------------- + */ +/** + * Var expression state for VCI + * + * Normally, Var expression is converted to ExprState exression state in ExecInitNode(), + * but in VCI, additional information is required, so a dedicated class is created. + */ +typedef struct VciVarState +{ + ExprState xprstate; /* Base class VCI Plan State */ + VciScanState *scanstate; /* Pointer to VciScanState from which Var will + * load data */ +} VciVarState; + +/** + * Param expression state for VCI + * + * Normally, Param expression is converted to ExprState exression state in ExecInitNode(), + * but in VCI, additional information is required, so a dedicated class is created. + */ +typedef struct VciParamState +{ + ExprState xprstate; /* Base class VCI Plan State */ + Plan *plan; /* th plan to hold this Param */ + +} VciParamState; + +extern CustomScanMethods vci_scan_scan_methods; +extern CustomExecMethods vci_scan_exec_column_store_methods; +extern CustomScanMethods vci_sort_scan_methods; +extern CustomExecMethods vci_sort_exec_methods; +extern CustomScanMethods vci_agg_scan_methods; +extern CustomExecMethods vci_agg_exec_methods; +extern CustomScanMethods vci_hashagg_scan_methods; +extern CustomExecMethods vci_hashagg_exec_methods; +extern CustomScanMethods vci_groupagg_scan_methods; +extern CustomExecMethods vci_groupagg_exec_methods; +extern CustomScanMethods vci_gather_scan_methods; +extern CustomExecMethods vci_gather_exec_methods; + +/* ---------------- + * vci_executor.c + * ---------------- + */ + +/** + * Enum that specifies how Var is handled in ExecInitNode() + */ +typedef enum vci_initexpr +{ + VCI_INIT_EXPR_NONE, + + /** Var converts to ExprState like original */ + VCI_INIT_EXPR_NORMAL, + + /** Var converts to VciVarState */ + VCI_INIT_EXPR_FETCHING_COLUMN_STORE, + + /** Var converts to VciVarState, but Aggref and later convert to ExpState like original */ + VCI_INIT_EXPR_FETCHING_COLUMN_STORE_AGGREF, +} vci_initexpr_t; + +extern ExprState *VciExecInitQual(List *qual, PlanState *parent, vci_initexpr_t inittype); +extern TupleTableSlot *VciExecProject(VciProjectionInfo *projInfo); + +extern VciProjectionInfo *VciExecBuildProjectionInfo(List *targetList, + ExprContext *econtext, + TupleTableSlot *slot, + PlanState *parent, + TupleDesc inputDesc); + +/* ---------------- + * vci_planner.c + * ---------------- + */ +extern bool vci_is_supported_jointype(JoinType jointype); + +/* ---------------- + * vci_plan.c + * ---------------- + */ + +extern bool vci_is_custom_plan(Plan *plan); +extern int vci_get_vci_plan_type(Plan *plan); +extern void vci_copy_plan(VciPlan *dest, const VciPlan *src); +extern struct VciScan *vci_search_scan(VciPlan *); +extern struct VciScanState *vci_search_scan_state(VciPlanState *); +extern List *vci_generate_pass_through_target_list(List *targetlist); + +/* ---------------- + * vci_plan_func.c + * ---------------- + */ + +struct QueryDesc; + +/** + * Callback to notify plan_id before analyzing topmost plan + * (top of main plan tree and each subplan tree) in vci_plannedstmt_tree_walker() + * or vci_plannedstmt_tree_mutator() analyze. + */ +typedef void (*vci_topmost_plan_cb_t) (Plan *, int plan_id, void *context); + +/** + * Template for a function pointer passed as a callback to a mutator routine that rewrites a plan. + */ +typedef bool (*vci_mutator_t) (Plan **plan_p, Plan *parent, void *context, int eflags, bool *changed); + +extern PGDLLEXPORT bool vci_plannedstmt_tree_walker(PlannedStmt *plannedstmt, bool (*walker) (Plan *, void *), vci_topmost_plan_cb_t topmostplan, void *context); +extern PGDLLEXPORT bool vci_plan_tree_walker(Plan *plan, bool (*walker) (Plan *, void *), void *context); +extern bool vci_expression_walker(Plan *plan, bool (*walker) (Node *, void *), void *context); +extern bool vci_expression_and_colid_walker(Plan *plan, bool (*walker) (Node *, void *), void (*attr_cb) (AttrNumber *, void *), void *context); +extern bool vci_expression_and_initplan_walker(Plan *plan, bool (*walker) (Node *, void *), bool (*walker_initplan) (Node *, void *), void *context); + +extern bool vci_plannedstmt_tree_mutator(PlannedStmt *plannedstmt, vci_mutator_t mutator, vci_topmost_plan_cb_t topmostplan, void *context, int eflags, bool *changed); +extern bool vci_plannedstmt_tree_mutator_order(PlannedStmt *plannedstmt, vci_mutator_t mutator, vci_topmost_plan_cb_t topmostplan, void *context, int eflags, bool *changed, int *subplan_order); +extern bool vci_plan_tree_mutator(Plan **plan_p, Plan *parent, vci_mutator_t mutator, void *context, int eflags, bool *changed); + +/* ---------------- + * vci_scan.c + * ---------------- + */ +extern TupleTableSlot *VciExecProcScanTuple(VciScanState *node); +extern int VciExecProcScanVector(VciScanState *scanstate); + +/* ---------------- + * vci_sort.c + * ---------------- + */ +struct Tuplesortstate; + +extern struct Tuplesortstate *vci_sort_exec_top_half(VciSortState *sortstate); +extern void vci_sort_perform_sort(VciSortState *sortstate); + +/* ---------------- + * vci_agg.c + * ---------------- + */ + +extern void vci_agg_fill_hash_table(VciAggState *aggstate); +extern TupleTableSlot *vci_agg_retrieve_hash_table(VciAggState *aggstate); +extern TupleHashEntry vci_agg_find_group_from_hash_table(VciAggState *aggstate); +extern void vci_initialize_aggregates(VciAggState *aggstate, + VciAggStatePerAgg peragg, + VciAggStatePerGroup pergroup); +extern void vci_finalize_aggregate(VciAggState *aggstate, VciAggStatePerAgg peraggstate, VciAggStatePerGroup pergroupstate, Datum *resultVal, bool *resultIsNull); +extern void vci_advance_aggregates(VciAggState *aggstate, VciAggStatePerGroup pergroup); + +/* ---------------- + * vci_aggmergetranstype.c + * ---------------- + */ + +/** + * Template for function pointer for copying Datum + */ +typedef Datum (*VciCopyDatumFunc) (Datum, bool, int); + +extern bool vci_is_supported_aggregation(Aggref *aggref); + +/* ---------------- + * vci_gather.c + * ---------------- + */ + +/* ---------------- + * vci_param.c + * ---------------- + */ +extern void VciExecEvalParamExec(ExprState *exprstate, ExprEvalStep *op, ExprContext *econtext); + +/* ---------------- + * Column store fetching (vci_fetch_column_store.c) + * ---------------- + */ +extern void vci_initialize_query_context(QueryDesc *queryDesc, int eflags); +extern void vci_finalize_query_context(void); +extern void vci_free_query_context(void); +extern bool vci_is_processing_custom_plan(void); + +extern void vci_create_one_fetch_context_for_fetching_column_store(VciScanState *scanstate, ExprContext *econtext); +extern void vci_clone_one_fetch_context_for_fetching_column_store(VciScanState *scanstate); +extern void vci_destroy_one_fetch_context_for_fetching_column_store(VciScanState *scanstate); + +extern void vci_set_starting_position_for_fetching_column_store(VciScanState *scanstate, int64 crid, int size); + +extern bool vci_fill_vector_set_from_column_store(VciScanState *scanstate); +extern void vci_mark_pos_vector_set_from_column_store(VciScanState *scanstate); +extern void vci_restr_pos_vector_set_from_column_store(VciScanState *scanstate); +extern void vci_step_next_tuple_from_column_store(VciScanState *scanstate); +extern void vci_finish_vector_set_from_column_store(VciScanState *scanstate); + +extern void VciExecTargetListWithVectorProcessing(VciScanState *scanstate, ExprContext *econtext, int max_slots); +extern void VciExecEvalScalarVarFromColumnStore(ExprState *exprstate, ExprEvalStep *op, ExprContext *econtext); + +/* ---------------- + * vci_planner.c + * ---------------- + */ +extern PlannedStmt *vci_generate_custom_plan(PlannedStmt *src, int eflags, Snapshot snapshot); + +#endif /* VCI_EXECUTOR_H */ diff --git a/contrib/vci/include/vci_fetch_row_store.h b/contrib/vci/include/vci_fetch_row_store.h new file mode 100644 index 0000000..841eebd --- /dev/null +++ b/contrib/vci/include/vci_fetch_row_store.h @@ -0,0 +1,22 @@ +/*------------------------------------------------------------------------- + * vci_fetch_row_store.h + * + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/include/vci_fetch_row_store.h + * + *------------------------------------------------------------------------- + */ +#ifndef VCI_FETCH_ROW_STORE_H +#define VCI_FETCH_ROW_STORE_H + +#include "access/heapam.h" + +struct VciScanState; + +extern void VciExecAssignScanProjectionInfo(struct VciScanState *node); +extern HeapTuple vci_heap_getnext(struct VciScanState *scanstate, HeapScanDesc scan, ScanDirection direction); + +#endif /* VCI_FETCH_ROW_STORE_H */ diff --git a/contrib/vci/include/vci_planner.h b/contrib/vci/include/vci_planner.h new file mode 100644 index 0000000..bf2b4c4 --- /dev/null +++ b/contrib/vci/include/vci_planner.h @@ -0,0 +1,151 @@ +/*------------------------------------------------------------------------- + * + * vci_planner.h + * Data struct definitions needed for analysis to rewrite plans + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/include/vci_planner.h + * + *------------------------------------------------------------------------- + */ +#ifndef VCI_PLANNER_H +#define VCI_PLANNER_H + +#include "access/attnum.h" +#include "nodes/execnodes.h" +#include "nodes/plannodes.h" + +/** + * Types for internal use only by planners + * + * NestLoop and HashJoin do not actually replace VCI Plan. + * So only record the possibility of including NestLoop and HashJoin + * in the parallel plan group + */ +typedef enum +{ + VCI_INNER_PLAN_TYPE_NONE = 0, + VCI_INNER_PLAN_TYPE_SCAN, + VCI_INNER_PLAN_TYPE_SORT, + VCI_INNER_PLAN_TYPE_AGG, + VCI_INNER_PLAN_TYPE_HASHJOIN, + VCI_INNER_PLAN_TYPE_NESTLOOP, + VCI_INNER_PLAN_TYPE_REDIST, +} vci_inner_plan_type_t; + +/** + * Whether plan node is suitable for VCi execution + */ +typedef enum +{ + VCI_PLAN_COMPAT_OK = 0, + VCI_PLAN_COMPAT_FORBID_TYPE, /* VCI execution prohibited type */ + VCI_PLAN_COMPAT_UNSUPPORTED_OBJ, +} vci_plan_compat_t; + +typedef struct +{ + /** + * VCI Plan Type + */ + vci_inner_plan_type_t plan_type; + + AttrNumber scan_plan_no; + + int preset_eflags; + + Bitmapset *def_param_ids; + Bitmapset *use_param_ids; + + vci_plan_compat_t plan_compat; + +} vci_plan_attr_t; + +typedef enum +{ + VCI_PARAM_EXEC_UNKNOWN = 0, + + VCI_PARAM_EXEC_NESTLOOP, + + VCI_PARAM_EXEC_INITPLAN, + + VCI_PARAM_EXEC_SUBPLAN, +} vci_param_exec_type_t; + +typedef struct +{ + vci_param_exec_type_t type; + Bitmapset *def_plan_nos; + int num_def_plans; + Bitmapset *use_plan_nos; + int num_use_plans; + int plan_id; +} vci_param_exec_attr_t; + +typedef enum +{ + VCI_SUBPLAN_UNKNOWN = 0, + VCI_SUBPLAN_INITPLAN, + VCI_SUBPLAN_SUBPLAN, +} vci_subplan_type_t; + +typedef struct +{ + Plan *topmostplan; /** Topmost Plan */ + vci_subplan_type_t type; + Bitmapset *plan_ids; + + bool has_analyzed_parallel; +} vci_subplan_attr_t; + +typedef struct +{ + PlannedStmt *plannedstmt; + + EState *estate; + + vci_subplan_attr_t *subplan_attr_map; + + int max_subplan_attrs; + + int *subplan_order_array; + + vci_plan_attr_t *plan_attr_map; + + int max_plan_attrs; + + AttrNumber last_plan_no; + + vci_param_exec_attr_t *param_exec_attr_map; + + int current_plan_id; + + AttrNumber current_plan_no; + + bool forbid_parallel_exec; + + bool suppress_vp; + + struct + { + List *main_plan_list; + + Bitmapset *plan_group; + + Bitmapset *correlated_subplans; + + Bitmapset *local_param_ids; + } parallel; + +} vci_rewrite_plan_context_t; + +extern bool vci_preanalyze_plan_tree(PlannedStmt *target, vci_rewrite_plan_context_t *rp_context, int eflags, bool *isGather); +extern void vci_register_plan_id(Plan *plan, int plan_id, void *context); +extern void vci_expand_plan_attr_map(vci_rewrite_plan_context_t *rp_context); +extern vci_inner_plan_type_t vci_get_inner_plan_type(vci_rewrite_plan_context_t *context, const Plan *plan); +extern AttrNumber vci_get_inner_scan_plan_no(vci_rewrite_plan_context_t *context, const Plan *plan); +extern void vci_set_inner_plan_type_and_scan_plan_no(vci_rewrite_plan_context_t *context, Plan *plan, vci_inner_plan_type_t plan_type, AttrNumber scan_plan_no); + +#endif /* VCI_PLANNER_H */ -- 1.8.3.1