David Rowley <dgrowleyml@gmail.com> writes:
> On Wed, 18 Dec 2024 at 14:02, Tom Lane <tgl@sss.pgh.pa.us> wrote:
>> it appears your patch did not fully adjust BuildTupleHashTableExt
>> for variable input-slot type. You need the attached as well.
> Do you have a test case in master that triggers a problem here?
No, that's what I didn't want to spend time looking for ;-).
But here is a WIP modification of my 0001 patch from the SetOp
improvement thread -- the difference is to lobotomize
setop_load_hash_tuple to just return the input slot. Without
the execGrouping.c changes, it gets assertion failures in the core
regression tests. (My intention had been to remove
setop_load_hash_tuple and then adjust build_hash_table to
pass a non-null inputOps if the two inputs have the same tuple
slot type. But I got stuck on this step.)
regards, tom lane
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 4a8f72305c..7491e53c03 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -236,9 +236,9 @@ BuildTupleHashTableExt(PlanState *parent,
hash_iv);
/* build comparator for all columns */
- /* XXX: should we support non-minimal tuples for the inputslot? */
hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
- &TTSOpsMinimalTuple, &TTSOpsMinimalTuple,
+ inputOps,
+ &TTSOpsMinimalTuple,
numCols,
keyColIdx, eqfuncoids, collations,
allow_jit ? parent : NULL);
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c
index b40d81f3ff..159db5710f 100644
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -3,29 +3,30 @@
* nodeSetOp.c
* Routines to handle INTERSECT and EXCEPT selection
*
- * The input of a SetOp node consists of tuples from two relations,
- * which have been combined into one dataset, with a junk attribute added
- * that shows which relation each tuple came from. In SETOP_SORTED mode,
- * the input has furthermore been sorted according to all the grouping
- * columns (ie, all the non-junk attributes). The SetOp node scans each
- * group of identical tuples to determine how many came from each input
- * relation. Then it is a simple matter to emit the output demanded by the
- * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
+ * The input of a SetOp node consists of two relations (outer and inner)
+ * with identical column sets. In EXCEPT queries the outer relation is
+ * always the left side, while in INTERSECT cases the planner tries to
+ * make the outer relation be the smaller of the two inputs.
*
- * In SETOP_HASHED mode, the input is delivered in no particular order,
- * except that we know all the tuples from one input relation will come before
- * all the tuples of the other. The planner guarantees that the first input
- * relation is the left-hand one for EXCEPT, and tries to make the smaller
- * input relation come first for INTERSECT. We build a hash table in memory
- * with one entry for each group of identical tuples, and count the number of
- * tuples in the group from each relation. After seeing all the input, we
- * scan the hashtable and generate the correct output using those counts.
- * We can avoid making hashtable entries for any tuples appearing only in the
- * second input relation, since they cannot result in any output.
+ * In SETOP_SORTED mode, each input has been sorted according to all the
+ * grouping columns (ie, all the non-junk attributes). The SetOp node
+ * essentially performs a merge join on the grouping columns, except that
+ * it is only interested in counting how many tuples from each input match.
+ * Then it is a simple matter to emit the output demanded by the SQL spec
+ * for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
+ *
+ * In SETOP_HASHED mode, the inputs are delivered in no particular order.
+ * We read the outer relation and build a hash table in memory with one entry
+ * for each group of identical tuples, counting the number of tuples in the
+ * group. Then we read the inner relation and count the number of tuples
+ * matching each outer group. (We can disregard any tuples appearing only
+ * in the inner relation, since they cannot result in any output.) After
+ * seeing all the input, we scan the hashtable and generate the correct
+ * output using those counts.
*
* This node type is not used for UNION or UNION ALL, since those can be
- * implemented more cheaply (there's no need for the junk attribute to
- * identify the source relation).
+ * implemented more cheaply (there's no need to count the number of
+ * matching tuples).
*
* Note that SetOp does no qual checking nor projection. The delivered
* output tuples are just copies of the first-to-arrive tuple in each
@@ -54,65 +55,30 @@
/*
* SetOpStatePerGroupData - per-group working state
*
- * These values are working state that is initialized at the start of
- * an input tuple group and updated for each input tuple.
- *
- * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
- * the plan state node. In SETOP_HASHED mode, the hash table contains one
- * of these for each tuple group.
+ * In SETOP_SORTED mode, we need only one of these structs, and it's just a
+ * local in setop_retrieve_sorted. In SETOP_HASHED mode, the hash table
+ * contains one of these for each tuple group.
*/
typedef struct SetOpStatePerGroupData
{
- long numLeft; /* number of left-input dups in group */
- long numRight; /* number of right-input dups in group */
-} SetOpStatePerGroupData;
+ int64 numLeft; /* number of left-input dups in group */
+ int64 numRight; /* number of right-input dups in group */
+} SetOpStatePerGroupData;
+
+typedef SetOpStatePerGroupData *SetOpStatePerGroup;
-static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
+static TupleTableSlot *setop_retrieve_sorted(SetOpState *setopstate);
+static void setop_load_group(SetOpStatePerInput *input, PlanState *inputPlan,
+ SetOpState *setopstate);
+static int setop_compare_slots(TupleTableSlot *s1, TupleTableSlot *s2,
+ SetOpState *setopstate);
+static TupleTableSlot *setop_load_hash_tuple(SetOpState *setopstate,
+ TupleTableSlot *inputslot);
static void setop_fill_hash_table(SetOpState *setopstate);
static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
-/*
- * Initialize state for a new group of input values.
- */
-static inline void
-initialize_counts(SetOpStatePerGroup pergroup)
-{
- pergroup->numLeft = pergroup->numRight = 0;
-}
-
-/*
- * Advance the appropriate counter for one input tuple.
- */
-static inline void
-advance_counts(SetOpStatePerGroup pergroup, int flag)
-{
- if (flag)
- pergroup->numRight++;
- else
- pergroup->numLeft++;
-}
-
-/*
- * Fetch the "flag" column from an input tuple.
- * This is an integer column with value 0 for left side, 1 for right side.
- */
-static int
-fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
-{
- SetOp *node = (SetOp *) setopstate->ps.plan;
- int flag;
- bool isNull;
-
- flag = DatumGetInt32(slot_getattr(inputslot,
- node->flagColIdx,
- &isNull));
- Assert(!isNull);
- Assert(flag == 0 || flag == 1);
- return flag;
-}
-
/*
* Initialize the hash table to empty.
*/
@@ -130,10 +96,10 @@ build_hash_table(SetOpState *setopstate)
desc,
NULL,
node->numCols,
- node->dupColIdx,
+ node->cmpColIdx,
setopstate->eqfuncoids,
setopstate->hashfunctions,
- node->dupCollations,
+ node->cmpCollations,
node->numGroups,
0,
setopstate->ps.state->es_query_cxt,
@@ -218,108 +184,126 @@ ExecSetOp(PlanState *pstate)
return setop_retrieve_hash_table(node);
}
else
- return setop_retrieve_direct(node);
+ return setop_retrieve_sorted(node);
}
/*
* ExecSetOp for non-hashed case
*/
static TupleTableSlot *
-setop_retrieve_direct(SetOpState *setopstate)
+setop_retrieve_sorted(SetOpState *setopstate)
{
PlanState *outerPlan;
- SetOpStatePerGroup pergroup;
- TupleTableSlot *outerslot;
+ PlanState *innerPlan;
TupleTableSlot *resultTupleSlot;
- ExprContext *econtext = setopstate->ps.ps_ExprContext;
/*
* get state info from node
*/
outerPlan = outerPlanState(setopstate);
- pergroup = (SetOpStatePerGroup) setopstate->pergroup;
+ innerPlan = innerPlanState(setopstate);
resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
/*
- * We loop retrieving groups until we find one we should return
+ * If first time through, establish the invariant that setop_load_group
+ * expects: each side's nextTupleSlot is the next output from the child
+ * plan, or empty if there is no more output from it.
*/
- while (!setopstate->setop_done)
+ if (setopstate->need_init)
{
+ setopstate->need_init = false;
+
+ setopstate->leftInput.nextTupleSlot = ExecProcNode(outerPlan);
+
/*
- * If we don't already have the first tuple of the new group, fetch it
- * from the outer plan.
+ * If the outer relation is empty, then we will emit nothing, and we
+ * don't need to read the inner relation at all.
*/
- if (setopstate->grp_firstTuple == NULL)
+ if (TupIsNull(setopstate->leftInput.nextTupleSlot))
{
- outerslot = ExecProcNode(outerPlan);
- if (!TupIsNull(outerslot))
- {
- /* Make a copy of the first input tuple */
- setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
- }
- else
- {
- /* outer plan produced no tuples at all */
- setopstate->setop_done = true;
- return NULL;
- }
+ setopstate->setop_done = true;
+ return NULL;
}
+ setopstate->rightInput.nextTupleSlot = ExecProcNode(innerPlan);
+
+ /* Set flags that we've not completed either side's group */
+ setopstate->leftInput.needGroup = true;
+ setopstate->rightInput.needGroup = true;
+ }
+
+ /*
+ * We loop retrieving groups until we find one we should return
+ */
+ while (!setopstate->setop_done)
+ {
+ int cmpresult;
+ SetOpStatePerGroupData pergroup;
+
/*
- * Store the copied first input tuple in the tuple table slot reserved
- * for it. The tuple will be deleted when it is cleared from the
- * slot.
+ * Fetch the rest of the current outer group, if we didn't already.
*/
- ExecStoreHeapTuple(setopstate->grp_firstTuple,
- resultTupleSlot,
- true);
- setopstate->grp_firstTuple = NULL; /* don't keep two pointers */
+ if (setopstate->leftInput.needGroup)
+ setop_load_group(&setopstate->leftInput, outerPlan, setopstate);
- /* Initialize working state for a new input tuple group */
- initialize_counts(pergroup);
+ /*
+ * If no more outer groups, we're done, and don't need to look at any
+ * more of the inner relation.
+ */
+ if (setopstate->leftInput.numTuples == 0)
+ {
+ setopstate->setop_done = true;
+ break;
+ }
- /* Count the first input tuple */
- advance_counts(pergroup,
- fetch_tuple_flag(setopstate, resultTupleSlot));
+ /*
+ * Fetch the rest of the current inner group, if we didn't already.
+ */
+ if (setopstate->rightInput.needGroup)
+ setop_load_group(&setopstate->rightInput, innerPlan, setopstate);
/*
- * Scan the outer plan until we exhaust it or cross a group boundary.
+ * Determine whether we have matching groups on both sides (this is
+ * basically like the core logic of a merge join).
*/
- for (;;)
- {
- outerslot = ExecProcNode(outerPlan);
- if (TupIsNull(outerslot))
- {
- /* no more outer-plan tuples available */
- setopstate->setop_done = true;
- break;
- }
-
- /*
- * Check whether we've crossed a group boundary.
- */
- econtext->ecxt_outertuple = resultTupleSlot;
- econtext->ecxt_innertuple = outerslot;
-
- if (!ExecQualAndReset(setopstate->eqfunction, econtext))
- {
- /*
- * Save the first input tuple of the next group.
- */
- setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
- break;
- }
+ if (setopstate->rightInput.numTuples == 0)
+ cmpresult = -1; /* as though left input is lesser */
+ else
+ cmpresult = setop_compare_slots(setopstate->leftInput.firstTupleSlot,
+ setopstate->rightInput.firstTupleSlot,
+ setopstate);
- /* Still in same group, so count this tuple */
- advance_counts(pergroup,
- fetch_tuple_flag(setopstate, outerslot));
+ if (cmpresult < 0)
+ {
+ /* Left group is first, has no right matches */
+ pergroup.numLeft = setopstate->leftInput.numTuples;
+ pergroup.numRight = 0;
+ /* We'll need another left group next time */
+ setopstate->leftInput.needGroup = true;
+ }
+ else if (cmpresult == 0)
+ {
+ /* We have matching groups */
+ pergroup.numLeft = setopstate->leftInput.numTuples;
+ pergroup.numRight = setopstate->rightInput.numTuples;
+ /* We'll need to read from both sides next time */
+ setopstate->leftInput.needGroup = true;
+ setopstate->rightInput.needGroup = true;
+ }
+ else
+ {
+ /* Right group has no left matches, so we can ignore it */
+ setopstate->rightInput.needGroup = true;
+ continue;
}
/*
- * Done scanning input tuple group. See if we should emit any copies
- * of result tuple, and if so return the first copy.
+ * Done scanning these input tuple groups. See if we should emit any
+ * copies of result tuple, and if so return the first copy. (Note
+ * that the result tuple is the same as the left input's firstTuple
+ * slot.)
*/
- set_output_count(setopstate, pergroup);
+ set_output_count(setopstate, &pergroup);
if (setopstate->numOutput > 0)
{
@@ -334,84 +318,199 @@ setop_retrieve_direct(SetOpState *setopstate)
}
/*
- * ExecSetOp for hashed case: phase 1, read input and build hash table
+ * Load next group of tuples from one child plan or the other.
+ *
+ * On entry, we've already read the first tuple of the next group
+ * (if there is one) into input->nextTupleSlot. This invariant
+ * is maintained on exit.
+ */
+static void
+setop_load_group(SetOpStatePerInput *input, PlanState *inputPlan,
+ SetOpState *setopstate)
+{
+ input->needGroup = false;
+
+ /* If we've exhausted this child plan, report an empty group */
+ if (TupIsNull(input->nextTupleSlot))
+ {
+ ExecClearTuple(input->firstTupleSlot);
+ input->numTuples = 0;
+ return;
+ }
+
+ /* Make a local copy of the first tuple for comparisons */
+ ExecStoreMinimalTuple(ExecCopySlotMinimalTuple(input->nextTupleSlot),
+ input->firstTupleSlot,
+ true);
+ /* and count it */
+ input->numTuples = 1;
+
+ /* Scan till we find the end-of-group */
+ for (;;)
+ {
+ int cmpresult;
+
+ /* Get next input tuple, if there is one */
+ input->nextTupleSlot = ExecProcNode(inputPlan);
+ if (TupIsNull(input->nextTupleSlot))
+ break;
+
+ /* There is; does it belong to same group as firstTuple? */
+ cmpresult = setop_compare_slots(input->firstTupleSlot,
+ input->nextTupleSlot,
+ setopstate);
+ Assert(cmpresult <= 0); /* else input is mis-sorted */
+ if (cmpresult != 0)
+ break;
+
+ /* Still in same group, so count this tuple */
+ input->numTuples++;
+ }
+}
+
+/*
+ * Compare the tuples in the two given slots.
+ */
+static int
+setop_compare_slots(TupleTableSlot *s1, TupleTableSlot *s2,
+ SetOpState *setopstate)
+{
+ /* We'll often need to fetch all the columns, so just do it */
+ slot_getallattrs(s1);
+ slot_getallattrs(s2);
+ for (int nkey = 0; nkey < setopstate->numCols; nkey++)
+ {
+ SortSupport sortKey = setopstate->sortKeys + nkey;
+ AttrNumber attno = sortKey->ssup_attno;
+ Datum datum1 = s1->tts_values[attno - 1],
+ datum2 = s2->tts_values[attno - 1];
+ bool isNull1 = s1->tts_isnull[attno - 1],
+ isNull2 = s2->tts_isnull[attno - 1];
+ int compare;
+
+ compare = ApplySortComparator(datum1, isNull1,
+ datum2, isNull2,
+ sortKey);
+ if (compare != 0)
+ return compare;
+ }
+ return 0;
+}
+
+/*
+ * Prepare an input tuple for hashtable lookup.
+ *
+ * We could use the input slot directly except that LookupTupleHashEntry
+ * only accepts MinimalTuple slots, which is typically not what our child
+ * plans will return. We abuse the SetOp node's ResultTupleSlot as a
+ * temporary slot for this purpose.
+ */
+static TupleTableSlot *
+setop_load_hash_tuple(SetOpState *setopstate, TupleTableSlot *inputslot)
+{
+#if 1
+ return inputslot;
+#else
+ TupleTableSlot *resultslot = setopstate->ps.ps_ResultTupleSlot;
+ int natts = resultslot->tts_tupleDescriptor->natts;
+
+ slot_getallattrs(inputslot);
+ ExecClearTuple(resultslot);
+ for (int i = 0; i < natts; i++)
+ {
+ resultslot->tts_values[i] = inputslot->tts_values[i];
+ resultslot->tts_isnull[i] = inputslot->tts_isnull[i];
+ }
+ ExecStoreVirtualTuple(resultslot);
+ return resultslot;
+#endif
+}
+
+/*
+ * ExecSetOp for hashed case: phase 1, read inputs and build hash table
*/
static void
setop_fill_hash_table(SetOpState *setopstate)
{
- SetOp *node = (SetOp *) setopstate->ps.plan;
PlanState *outerPlan;
- int firstFlag;
- bool in_first_rel PG_USED_FOR_ASSERTS_ONLY;
+ PlanState *innerPlan;
ExprContext *econtext = setopstate->ps.ps_ExprContext;
+ bool have_tuples = false;
/*
* get state info from node
*/
outerPlan = outerPlanState(setopstate);
- firstFlag = node->firstFlag;
- /* verify planner didn't mess up */
- Assert(firstFlag == 0 ||
- (firstFlag == 1 &&
- (node->cmd == SETOPCMD_INTERSECT ||
- node->cmd == SETOPCMD_INTERSECT_ALL)));
+ innerPlan = innerPlanState(setopstate);
/*
* Process each outer-plan tuple, and then fetch the next one, until we
* exhaust the outer plan.
*/
- in_first_rel = true;
for (;;)
{
TupleTableSlot *outerslot;
- int flag;
TupleHashEntryData *entry;
bool isnew;
outerslot = ExecProcNode(outerPlan);
if (TupIsNull(outerslot))
break;
+ have_tuples = true;
- /* Identify whether it's left or right input */
- flag = fetch_tuple_flag(setopstate, outerslot);
+ /* Find or build hashtable entry for this tuple's group */
+ entry = LookupTupleHashEntry(setopstate->hashtable,
+ setop_load_hash_tuple(setopstate,
+ outerslot),
+ &isnew, NULL);
- if (flag == firstFlag)
+ /* If new tuple group, initialize counts to zero */
+ if (isnew)
{
- /* (still) in first input relation */
- Assert(in_first_rel);
-
- /* Find or build hashtable entry for this tuple's group */
- entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
- &isnew, NULL);
-
- /* If new tuple group, initialize counts */
- if (isnew)
- {
- entry->additional = (SetOpStatePerGroup)
- MemoryContextAlloc(setopstate->hashtable->tablecxt,
+ entry->additional = (SetOpStatePerGroup)
+ MemoryContextAllocZero(setopstate->hashtable->tablecxt,
sizeof(SetOpStatePerGroupData));
- initialize_counts((SetOpStatePerGroup) entry->additional);
- }
-
- /* Advance the counts */
- advance_counts((SetOpStatePerGroup) entry->additional, flag);
}
- else
+
+ /* Advance the counts */
+ ((SetOpStatePerGroup) entry->additional)->numLeft++;
+
+ /* Must reset expression context after each hashtable lookup */
+ ResetExprContext(econtext);
+ }
+
+ /*
+ * If the outer relation is empty, then we will emit nothing, and we don't
+ * need to read the inner relation at all.
+ */
+ if (have_tuples)
+ {
+ /*
+ * Process each inner-plan tuple, and then fetch the next one, until
+ * we exhaust the inner plan.
+ */
+ for (;;)
{
- /* reached second relation */
- in_first_rel = false;
+ TupleTableSlot *innerslot;
+ TupleHashEntryData *entry;
+
+ innerslot = ExecProcNode(innerPlan);
+ if (TupIsNull(innerslot))
+ break;
/* For tuples not seen previously, do not make hashtable entry */
- entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
+ entry = LookupTupleHashEntry(setopstate->hashtable,
+ setop_load_hash_tuple(setopstate,
+ innerslot),
NULL, NULL);
/* Advance the counts if entry is already present */
if (entry)
- advance_counts((SetOpStatePerGroup) entry->additional, flag);
- }
+ ((SetOpStatePerGroup) entry->additional)->numRight++;
- /* Must reset expression context after each hashtable lookup */
- ResetExprContext(econtext);
+ /* Must reset expression context after each hashtable lookup */
+ ResetExprContext(econtext);
+ }
}
setopstate->table_filled = true;
@@ -482,7 +581,6 @@ SetOpState *
ExecInitSetOp(SetOp *node, EState *estate, int eflags)
{
SetOpState *setopstate;
- TupleDesc outerDesc;
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -495,14 +593,10 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
setopstate->ps.state = estate;
setopstate->ps.ExecProcNode = ExecSetOp;
- setopstate->eqfuncoids = NULL;
- setopstate->hashfunctions = NULL;
setopstate->setop_done = false;
setopstate->numOutput = 0;
- setopstate->pergroup = NULL;
- setopstate->grp_firstTuple = NULL;
- setopstate->hashtable = NULL;
- setopstate->tableContext = NULL;
+ setopstate->numCols = node->numCols;
+ setopstate->need_init = true;
/*
* create expression context
@@ -523,52 +617,72 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
/*
* initialize child nodes
*
- * If we are hashing then the child plan does not need to handle REWIND
+ * If we are hashing then the child plans do not need to handle REWIND
* efficiently; see ExecReScanSetOp.
*/
if (node->strategy == SETOP_HASHED)
eflags &= ~EXEC_FLAG_REWIND;
outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
- outerDesc = ExecGetResultType(outerPlanState(setopstate));
+ innerPlanState(setopstate) = ExecInitNode(innerPlan(node), estate, eflags);
/*
- * Initialize result slot and type. Setop nodes do no projections, so
- * initialize projection info for this node appropriately.
+ * Initialize locally-allocated slots. In hashed mode, we just need a
+ * result slot. In sorted mode, we need one first-tuple-of-group slot for
+ * each input; we use the result slot for the left input's slot and create
+ * another for the right input. (Note: the nextTupleSlot slots are not
+ * ours, but just point to the last slot returned by the input plan node.)
*/
- ExecInitResultTupleSlotTL(&setopstate->ps,
- node->strategy == SETOP_HASHED ?
- &TTSOpsMinimalTuple : &TTSOpsHeapTuple);
+ ExecInitResultTupleSlotTL(&setopstate->ps, &TTSOpsMinimalTuple);
+ if (node->strategy != SETOP_HASHED)
+ {
+ setopstate->leftInput.firstTupleSlot =
+ setopstate->ps.ps_ResultTupleSlot;
+ setopstate->rightInput.firstTupleSlot =
+ ExecInitExtraTupleSlot(estate,
+ setopstate->ps.ps_ResultTupleDesc,
+ &TTSOpsMinimalTuple);
+ }
+
+ /* Setop nodes do no projections. */
setopstate->ps.ps_ProjInfo = NULL;
/*
- * Precompute fmgr lookup data for inner loop. We need both equality and
- * hashing functions to do it by hashing, but only equality if not
- * hashing.
+ * Precompute fmgr lookup data for inner loop. We need equality and
+ * hashing functions to do it by hashing, while for sorting we need
+ * SortSupport data.
*/
if (node->strategy == SETOP_HASHED)
execTuplesHashPrepare(node->numCols,
- node->dupOperators,
+ node->cmpOperators,
&setopstate->eqfuncoids,
&setopstate->hashfunctions);
else
- setopstate->eqfunction =
- execTuplesMatchPrepare(outerDesc,
- node->numCols,
- node->dupColIdx,
- node->dupOperators,
- node->dupCollations,
- &setopstate->ps);
+ {
+ int nkeys = node->numCols;
+
+ setopstate->sortKeys = (SortSupport)
+ palloc0(nkeys * sizeof(SortSupportData));
+ for (int i = 0; i < nkeys; i++)
+ {
+ SortSupport sortKey = setopstate->sortKeys + i;
+ sortKey->ssup_cxt = CurrentMemoryContext;
+ sortKey->ssup_collation = node->cmpCollations[i];
+ sortKey->ssup_nulls_first = node->cmpNullsFirst[i];
+ sortKey->ssup_attno = node->cmpColIdx[i];
+ /* abbreviated key conversion is not useful here */
+ sortKey->abbreviate = false;
+
+ PrepareSortSupportFromOrderingOp(node->cmpOperators[i], sortKey);
+ }
+ }
+
+ /* Create a hash table if needed */
if (node->strategy == SETOP_HASHED)
{
build_hash_table(setopstate);
setopstate->table_filled = false;
}
- else
- {
- setopstate->pergroup =
- (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
- }
return setopstate;
}
@@ -576,7 +690,7 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
/* ----------------------------------------------------------------
* ExecEndSetOp
*
- * This shuts down the subplan and frees resources allocated
+ * This shuts down the subplans and frees resources allocated
* to this node.
* ----------------------------------------------------------------
*/
@@ -588,6 +702,7 @@ ExecEndSetOp(SetOpState *node)
MemoryContextDelete(node->tableContext);
ExecEndNode(outerPlanState(node));
+ ExecEndNode(innerPlanState(node));
}
@@ -595,6 +710,7 @@ void
ExecReScanSetOp(SetOpState *node)
{
PlanState *outerPlan = outerPlanState(node);
+ PlanState *innerPlan = innerPlanState(node);
ExecClearTuple(node->ps.ps_ResultTupleSlot);
node->setop_done = false;
@@ -612,34 +728,29 @@ ExecReScanSetOp(SetOpState *node)
return;
/*
- * If we do have the hash table and the subplan does not have any
+ * If we do have the hash table and the subplans do not have any
* parameter changes, then we can just rescan the existing hash table;
* no need to build it again.
*/
- if (outerPlan->chgParam == NULL)
+ if (outerPlan->chgParam == NULL && innerPlan->chgParam == NULL)
{
ResetTupleHashIterator(node->hashtable, &node->hashiter);
return;
}
- }
- /* Release first tuple of group, if we have made a copy */
- if (node->grp_firstTuple != NULL)
- {
- heap_freetuple(node->grp_firstTuple);
- node->grp_firstTuple = NULL;
- }
-
- /* Release any hashtable storage */
- if (node->tableContext)
- MemoryContextReset(node->tableContext);
+ /* Release any hashtable storage */
+ if (node->tableContext)
+ MemoryContextReset(node->tableContext);
- /* And rebuild empty hashtable if needed */
- if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
- {
+ /* And rebuild an empty hashtable */
ResetTupleHashTable(node->hashtable);
node->table_filled = false;
}
+ else
+ {
+ /* Need to re-read first input from each side */
+ node->need_init = true;
+ }
/*
* if chgParam of subnode is not null then plan will be re-scanned by
@@ -647,4 +758,6 @@ ExecReScanSetOp(SetOpState *node)
*/
if (outerPlan->chgParam == NULL)
ExecReScan(outerPlan);
+ if (innerPlan->chgParam == NULL)
+ ExecReScan(innerPlan);
}
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README
index 2ab4f3dbf3..f341d9f303 100644
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -649,7 +649,7 @@ RelOptInfo - a relation or joined relations
GroupingSetsPath - an Agg plan node used to implement GROUPING SETS
MinMaxAggPath - a Result plan node with subplans performing MIN/MAX
WindowAggPath - a WindowAgg plan node applied to some sub-path
- SetOpPath - a SetOp plan node applied to some sub-path
+ SetOpPath - a SetOp plan node applied to two sub-paths
RecursiveUnionPath - a RecursiveUnion plan node applied to two sub-paths
LockRowsPath - a LockRows plan node applied to some sub-path
ModifyTablePath - a ModifyTable plan node applied to some sub-path(s)
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 178c572b02..b3e2294e84 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -301,9 +301,9 @@ static Unique *make_unique_from_pathkeys(Plan *lefttree,
List *pathkeys, int numCols);
static Gather *make_gather(List *qptlist, List *qpqual,
int nworkers, int rescan_param, bool single_copy, Plan *subplan);
-static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
- List *distinctList, AttrNumber flagColIdx, int firstFlag,
- long numGroups);
+static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy,
+ List *tlist, Plan *lefttree, Plan *righttree,
+ List *groupList, long numGroups);
static LockRows *make_lockrows(Plan *lefttree, List *rowMarks, int epqParam);
static Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan);
static ProjectSet *make_project_set(List *tlist, Plan *subplan);
@@ -2719,25 +2719,29 @@ static SetOp *
create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags)
{
SetOp *plan;
- Plan *subplan;
+ List *tlist = build_path_tlist(root, &best_path->path);
+ Plan *leftplan;
+ Plan *rightplan;
long numGroups;
/*
* SetOp doesn't project, so tlist requirements pass through; moreover we
* need grouping columns to be labeled.
*/
- subplan = create_plan_recurse(root, best_path->subpath,
- flags | CP_LABEL_TLIST);
+ leftplan = create_plan_recurse(root, best_path->leftpath,
+ flags | CP_LABEL_TLIST);
+ rightplan = create_plan_recurse(root, best_path->rightpath,
+ flags | CP_LABEL_TLIST);
/* Convert numGroups to long int --- but 'ware overflow! */
numGroups = clamp_cardinality_to_long(best_path->numGroups);
plan = make_setop(best_path->cmd,
best_path->strategy,
- subplan,
- best_path->distinctList,
- best_path->flagColIdx,
- best_path->firstFlag,
+ tlist,
+ leftplan,
+ rightplan,
+ best_path->groupList,
numGroups);
copy_generic_path_info(&plan->plan, (Path *) best_path);
@@ -6950,57 +6954,62 @@ make_gather(List *qptlist,
}
/*
- * distinctList is a list of SortGroupClauses, identifying the targetlist
- * items that should be considered by the SetOp filter. The input path must
- * already be sorted accordingly.
+ * groupList is a list of SortGroupClauses, identifying the targetlist
+ * items that should be considered by the SetOp filter. The input plans must
+ * already be sorted accordingly, if we're doing SETOP_SORTED mode.
*/
static SetOp *
-make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
- List *distinctList, AttrNumber flagColIdx, int firstFlag,
- long numGroups)
+make_setop(SetOpCmd cmd, SetOpStrategy strategy,
+ List *tlist, Plan *lefttree, Plan *righttree,
+ List *groupList, long numGroups)
{
SetOp *node = makeNode(SetOp);
Plan *plan = &node->plan;
- int numCols = list_length(distinctList);
+ int numCols = list_length(groupList);
int keyno = 0;
- AttrNumber *dupColIdx;
- Oid *dupOperators;
- Oid *dupCollations;
+ AttrNumber *cmpColIdx;
+ Oid *cmpOperators;
+ Oid *cmpCollations;
+ bool *cmpNullsFirst;
ListCell *slitem;
- plan->targetlist = lefttree->targetlist;
+ plan->targetlist = tlist;
plan->qual = NIL;
plan->lefttree = lefttree;
- plan->righttree = NULL;
+ plan->righttree = righttree;
/*
- * convert SortGroupClause list into arrays of attr indexes and equality
+ * convert SortGroupClause list into arrays of attr indexes and comparison
* operators, as wanted by executor
*/
- dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
- dupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
- dupCollations = (Oid *) palloc(sizeof(Oid) * numCols);
+ cmpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
+ cmpOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+ cmpCollations = (Oid *) palloc(sizeof(Oid) * numCols);
+ cmpNullsFirst = (bool *) palloc(sizeof(bool) * numCols);
- foreach(slitem, distinctList)
+ foreach(slitem, groupList)
{
SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);
- dupColIdx[keyno] = tle->resno;
- dupOperators[keyno] = sortcl->eqop;
- dupCollations[keyno] = exprCollation((Node *) tle->expr);
- Assert(OidIsValid(dupOperators[keyno]));
+ cmpColIdx[keyno] = tle->resno;
+ if (strategy == SETOP_HASHED)
+ cmpOperators[keyno] = sortcl->eqop;
+ else
+ cmpOperators[keyno] = sortcl->sortop;
+ Assert(OidIsValid(cmpOperators[keyno]));
+ cmpCollations[keyno] = exprCollation((Node *) tle->expr);
+ cmpNullsFirst[keyno] = sortcl->nulls_first;
keyno++;
}
node->cmd = cmd;
node->strategy = strategy;
node->numCols = numCols;
- node->dupColIdx = dupColIdx;
- node->dupOperators = dupOperators;
- node->dupCollations = dupCollations;
- node->flagColIdx = flagColIdx;
- node->firstFlag = firstFlag;
+ node->cmpColIdx = cmpColIdx;
+ node->cmpOperators = cmpOperators;
+ node->cmpCollations = cmpCollations;
+ node->cmpNullsFirst = cmpNullsFirst;
node->numGroups = numGroups;
return node;
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 9c3822f19a..1c43370005 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -65,7 +65,7 @@ static List *plan_union_children(PlannerInfo *root,
List **istrivial_tlist);
static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
- Path *input_path,
+ Path *lpath, Path *rpath,
double dNumGroups, double dNumOutputRows,
const char *construct);
static List *generate_setop_tlist(List *colTypes, List *colCollations,
@@ -315,8 +315,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
* to the corresponding tlist entries of the subplan. However, since
* the subplan was generated by generate_union_paths() or
* generate_nonunion_paths(), and hence its tlist was generated by
- * generate_append_tlist(), this will work. We just tell
- * generate_setop_tlist() to use varno 0.
+ * generate_append_tlist() or generate_setop_tlist(), this will work.
+ * We just tell generate_setop_tlist() to use varno 0.
*/
if (flag >= 0 ||
!tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
@@ -1028,29 +1028,27 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
*path;
List *lpath_tlist,
*rpath_tlist,
- *tlist_list,
*tlist,
- *groupList,
- *pathlist;
+ *groupList;
bool lpath_trivial_tlist,
- rpath_trivial_tlist;
+ rpath_trivial_tlist,
+ result_trivial_tlist;
double dLeftGroups,
dRightGroups,
dNumGroups,
dNumOutputRows;
bool use_hash;
SetOpCmd cmd;
- int firstFlag;
/*
* Tell children to fetch all tuples.
*/
root->tuple_fraction = 0.0;
- /* Recurse on children, ensuring their outputs are marked */
+ /* Recurse on children */
lrel = recurse_set_operations(op->larg, root,
op->colTypes, op->colCollations,
- false, 0,
+ false, -1,
refnames_tlist,
&lpath_tlist,
&lpath_trivial_tlist);
@@ -1060,10 +1058,9 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
else
dLeftGroups = lrel->rows;
- lpath = lrel->cheapest_total_path;
rrel = recurse_set_operations(op->rarg, root,
op->colTypes, op->colCollations,
- false, 1,
+ false, -1,
refnames_tlist,
&rpath_tlist,
&rpath_trivial_tlist);
@@ -1073,41 +1070,51 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
else
dRightGroups = rrel->rows;
- rpath = rrel->cheapest_total_path;
-
/* Undo effects of forcing tuple_fraction to 0 */
root->tuple_fraction = save_fraction;
/*
* For EXCEPT, we must put the left input first. For INTERSECT, either
* order should give the same results, and we prefer to put the smaller
- * input first in order to minimize the size of the hash table in the
- * hashing case. "Smaller" means the one with the fewer groups.
+ * input first in order to (a) minimize the size of the hash table in the
+ * hashing case, and (b) improve our chances of exploiting the executor's
+ * fast path for empty left-hand input. "Smaller" means the one with the
+ * fewer groups.
*/
- if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
- {
- pathlist = list_make2(lpath, rpath);
- tlist_list = list_make2(lpath_tlist, rpath_tlist);
- firstFlag = 0;
- }
- else
+ if (op->op != SETOP_EXCEPT && dLeftGroups > dRightGroups)
{
- pathlist = list_make2(rpath, lpath);
- tlist_list = list_make2(rpath_tlist, lpath_tlist);
- firstFlag = 1;
+ /* need to swap the two inputs */
+ RelOptInfo *tmprel;
+ List *tmplist;
+ double tmpd;
+
+ tmprel = lrel;
+ lrel = rrel;
+ rrel = tmprel;
+ tmplist = lpath_tlist;
+ lpath_tlist = rpath_tlist;
+ rpath_tlist = tmplist;
+ tmpd = dLeftGroups;
+ dLeftGroups = dRightGroups;
+ dRightGroups = tmpd;
}
+ lpath = lrel->cheapest_total_path;
+ rpath = rrel->cheapest_total_path;
+
/*
- * Generate tlist for Append plan node.
+ * Generate tlist for SetOp plan node.
*
- * The tlist for an Append plan isn't important as far as the Append is
+ * The tlist for a SetOp plan isn't important so far as the SetOp is
* concerned, but we must make it look real anyway for the benefit of the
- * next plan level up. In fact, it has to be real enough that the flag
- * column is shown as a variable not a constant, else setrefs.c will get
- * confused.
+ * next plan level up.
*/
- tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
- tlist_list, refnames_tlist);
+ tlist = generate_setop_tlist(op->colTypes, op->colCollations, -1,
+ 0, false, lpath_tlist, refnames_tlist,
+ &result_trivial_tlist);
+
+ /* We should not have needed any type coercions in the tlist */
+ Assert(result_trivial_tlist);
*pTargetList = tlist;
@@ -1116,12 +1123,6 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
bms_union(lrel->relids, rrel->relids));
result_rel->reltarget = create_pathtarget(root, tlist);
- /*
- * Append the child results together.
- */
- path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
- NIL, NULL, 0, false, -1);
-
/* Identify the grouping semantics */
groupList = generate_setop_grouplist(op, tlist);
@@ -1140,25 +1141,40 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
}
else
{
- dNumGroups = Min(dLeftGroups, dRightGroups);
+ dNumGroups = dLeftGroups;
dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
}
/*
- * Decide whether to hash or sort, and add a sort node if needed.
+ * Decide whether to hash or sort, and add sort nodes if needed.
*/
- use_hash = choose_hashed_setop(root, groupList, path,
+ use_hash = choose_hashed_setop(root, groupList, lpath, rpath,
dNumGroups, dNumOutputRows,
(op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
if (groupList && !use_hash)
- path = (Path *) create_sort_path(root,
- result_rel,
- path,
- make_pathkeys_for_sortclauses(root,
- groupList,
- tlist),
- -1.0);
+ {
+ List *pathkeys;
+
+ pathkeys = make_pathkeys_for_sortclauses(root,
+ groupList,
+ lpath_tlist);
+ if (!pathkeys_contained_in(pathkeys, lpath->pathkeys))
+ lpath = (Path *) create_sort_path(root,
+ lpath->parent,
+ lpath,
+ pathkeys,
+ -1.0);
+ pathkeys = make_pathkeys_for_sortclauses(root,
+ groupList,
+ rpath_tlist);
+ if (!pathkeys_contained_in(pathkeys, rpath->pathkeys))
+ rpath = (Path *) create_sort_path(root,
+ rpath->parent,
+ rpath,
+ pathkeys,
+ -1.0);
+ }
/*
* Finally, add a SetOp path node to generate the correct output.
@@ -1178,12 +1194,11 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
}
path = (Path *) create_setop_path(root,
result_rel,
- path,
+ lpath,
+ rpath,
cmd,
use_hash ? SETOP_HASHED : SETOP_SORTED,
groupList,
- list_length(op->colTypes) + 1,
- use_hash ? firstFlag : -1,
dNumGroups,
dNumOutputRows);
@@ -1285,10 +1300,13 @@ postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
/*
* choose_hashed_setop - should we use hashing for a set operation?
+ *
+ * XXX probably this should go away: just make both paths and let
+ * add_path sort it out.
*/
static bool
choose_hashed_setop(PlannerInfo *root, List *groupClauses,
- Path *input_path,
+ Path *lpath, Path *rpath,
double dNumGroups, double dNumOutputRows,
const char *construct)
{
@@ -1327,7 +1345,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
* Don't do it if it doesn't look like the hashtable will fit into
* hash_mem.
*/
- hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
+ hashentrysize = MAXALIGN(lpath->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
if (hashentrysize * dNumGroups > hash_mem_limit)
return false;
@@ -1336,9 +1354,9 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
* See if the estimated cost is no more than doing it the other way.
*
* We need to consider input_plan + hashagg versus input_plan + sort +
- * group. Note that the actual result plan might involve a SetOp or
- * Unique node, not Agg or Group, but the cost estimates for Agg and Group
- * should be close enough for our purposes here.
+ * group. XXX NOT TRUE: Note that the actual result plan might involve a
+ * SetOp or Unique node, not Agg or Group, but the cost estimates for Agg
+ * and Group should be close enough for our purposes here.
*
* These path variables are dummies that just hold cost fields; we don't
* make actual Paths for these steps.
@@ -1346,27 +1364,31 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
cost_agg(&hashed_p, root, AGG_HASHED, NULL,
numGroupCols, dNumGroups,
NIL,
- input_path->disabled_nodes,
- input_path->startup_cost, input_path->total_cost,
- input_path->rows, input_path->pathtarget->width);
+ lpath->disabled_nodes + rpath->disabled_nodes,
+ lpath->startup_cost + rpath->startup_cost,
+ lpath->total_cost + rpath->total_cost,
+ lpath->rows + rpath->rows,
+ lpath->pathtarget->width);
/*
- * Now for the sorted case. Note that the input is *always* unsorted,
- * since it was made by appending unrelated sub-relations together.
+ * Now for the sorted case. XXX NOT TRUE: Note that the input is *always*
+ * unsorted, since it was made by appending unrelated sub-relations
+ * together.
*/
- sorted_p.disabled_nodes = input_path->disabled_nodes;
- sorted_p.startup_cost = input_path->startup_cost;
- sorted_p.total_cost = input_path->total_cost;
+ sorted_p.disabled_nodes = lpath->disabled_nodes + rpath->disabled_nodes;
+ sorted_p.startup_cost = lpath->startup_cost + rpath->startup_cost;
+ sorted_p.total_cost = lpath->total_cost + rpath->total_cost;
/* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
cost_sort(&sorted_p, root, NIL, sorted_p.disabled_nodes,
sorted_p.total_cost,
- input_path->rows, input_path->pathtarget->width,
+ lpath->rows + rpath->rows,
+ lpath->pathtarget->width,
0.0, work_mem, -1.0);
cost_group(&sorted_p, root, numGroupCols, dNumGroups,
NIL,
sorted_p.disabled_nodes,
sorted_p.startup_cost, sorted_p.total_cost,
- input_path->rows);
+ lpath->rows + rpath->rows);
/*
* Now make the decision using the top-level tuple fraction. First we
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index fc97bf6ee2..e52e4b1d67 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -3634,25 +3634,26 @@ create_windowagg_path(PlannerInfo *root,
* Creates a pathnode that represents computation of INTERSECT or EXCEPT
*
* 'rel' is the parent relation associated with the result
- * 'subpath' is the path representing the source of data
+ * 'leftpath' is the path representing the left-hand source of data
+ * 'rightpath' is the path representing the right-hand source of data
* 'cmd' is the specific semantics (INTERSECT or EXCEPT, with/without ALL)
* 'strategy' is the implementation strategy (sorted or hashed)
- * 'distinctList' is a list of SortGroupClause's representing the grouping
- * 'flagColIdx' is the column number where the flag column will be, if any
- * 'firstFlag' is the flag value for the first input relation when hashing;
- * or -1 when sorting
- * 'numGroups' is the estimated number of distinct groups
+ * 'groupList' is a list of SortGroupClause's representing the grouping
+ * 'numGroups' is the estimated number of distinct groups in left-hand input
* 'outputRows' is the estimated number of output rows
+ *
+ * leftpath and rightpath must produce the same columns. Moreover, if
+ * strategy is SETOP_SORTED, leftpath and rightpath must both be sorted
+ * by all the grouping columns.
*/
SetOpPath *
create_setop_path(PlannerInfo *root,
RelOptInfo *rel,
- Path *subpath,
+ Path *leftpath,
+ Path *rightpath,
SetOpCmd cmd,
SetOpStrategy strategy,
- List *distinctList,
- AttrNumber flagColIdx,
- int firstFlag,
+ List *groupList,
double numGroups,
double outputRows)
{
@@ -3660,34 +3661,37 @@ create_setop_path(PlannerInfo *root,
pathnode->path.pathtype = T_SetOp;
pathnode->path.parent = rel;
- /* SetOp doesn't project, so use source path's pathtarget */
- pathnode->path.pathtarget = subpath->pathtarget;
+ pathnode->path.pathtarget = rel->reltarget;
/* For now, assume we are above any joins, so no parameterization */
pathnode->path.param_info = NULL;
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = rel->consider_parallel &&
- subpath->parallel_safe;
- pathnode->path.parallel_workers = subpath->parallel_workers;
+ leftpath->parallel_safe && rightpath->parallel_safe;
+ pathnode->path.parallel_workers =
+ leftpath->parallel_workers + rightpath->parallel_workers;
/* SetOp preserves the input sort order if in sort mode */
pathnode->path.pathkeys =
- (strategy == SETOP_SORTED) ? subpath->pathkeys : NIL;
+ (strategy == SETOP_SORTED) ? leftpath->pathkeys : NIL;
- pathnode->subpath = subpath;
+ pathnode->leftpath = leftpath;
+ pathnode->rightpath = rightpath;
pathnode->cmd = cmd;
pathnode->strategy = strategy;
- pathnode->distinctList = distinctList;
- pathnode->flagColIdx = flagColIdx;
- pathnode->firstFlag = firstFlag;
+ pathnode->groupList = groupList;
pathnode->numGroups = numGroups;
/*
* Charge one cpu_operator_cost per comparison per input tuple. We assume
* all columns get compared at most of the tuples.
+ *
+ * XXX all wrong for hashing
*/
- pathnode->path.disabled_nodes = subpath->disabled_nodes;
- pathnode->path.startup_cost = subpath->startup_cost;
- pathnode->path.total_cost = subpath->total_cost +
- cpu_operator_cost * subpath->rows * list_length(distinctList);
+ pathnode->path.disabled_nodes =
+ leftpath->disabled_nodes + rightpath->disabled_nodes;
+ pathnode->path.startup_cost =
+ leftpath->startup_cost + rightpath->startup_cost;
+ pathnode->path.total_cost = leftpath->total_cost + rightpath->total_cost +
+ cpu_operator_cost * (leftpath->rows + rightpath->rows) * list_length(groupList);
pathnode->path.rows = outputRows;
return pathnode;
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 7f71b7625d..6ca7310fea 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2805,27 +2805,34 @@ typedef struct HashState
/* ----------------
* SetOpState information
*
- * Even in "sorted" mode, SetOp nodes are more complex than a simple
- * Unique, since we have to count how many duplicates to return. But
- * we also support hashing, so this is really more like a cut-down
- * form of Agg.
+ * SetOp nodes support either sorted or hashed de-duplication.
+ * The sorted mode is a bit like MergeJoin, the hashed mode like Agg.
* ----------------
*/
-/* this struct is private in nodeSetOp.c: */
-typedef struct SetOpStatePerGroupData *SetOpStatePerGroup;
+typedef struct SetOpStatePerInput
+{
+ TupleTableSlot *firstTupleSlot; /* first tuple of current group */
+ int64 numTuples; /* number of tuples in current group */
+ TupleTableSlot *nextTupleSlot; /* next input tuple, if already read */
+ bool needGroup; /* do we need to load a new group? */
+} SetOpStatePerInput;
typedef struct SetOpState
{
PlanState ps; /* its first field is NodeTag */
- ExprState *eqfunction; /* equality comparator */
- Oid *eqfuncoids; /* per-grouping-field equality fns */
- FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
bool setop_done; /* indicates completion of output scan */
- long numOutput; /* number of dups left to output */
+ int64 numOutput; /* number of dups left to output */
+ int numCols; /* number of grouping columns */
+
/* these fields are used in SETOP_SORTED mode: */
- SetOpStatePerGroup pergroup; /* per-group working state */
- HeapTuple grp_firstTuple; /* copy of first tuple of current group */
+ SortSupport sortKeys; /* per-grouping-field sort data */
+ SetOpStatePerInput leftInput; /* current outer-relation input state */
+ SetOpStatePerInput rightInput; /* current inner-relation input state */
+ bool need_init; /* have we read the first tuples yet? */
+
/* these fields are used in SETOP_HASHED mode: */
+ Oid *eqfuncoids; /* per-grouping-field equality fns */
+ FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
TupleHashTable hashtable; /* hash table with one entry per group */
MemoryContext tableContext; /* memory context containing hash table */
bool table_filled; /* hash table filled yet? */
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 0759e00e96..58748d2ca6 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -2342,13 +2342,12 @@ typedef struct WindowAggPath
typedef struct SetOpPath
{
Path path;
- Path *subpath; /* path representing input source */
+ Path *leftpath; /* paths representing input sources */
+ Path *rightpath;
SetOpCmd cmd; /* what to do, see nodes.h */
SetOpStrategy strategy; /* how to do it, see nodes.h */
- List *distinctList; /* SortGroupClauses identifying target cols */
- AttrNumber flagColIdx; /* where is the flag column, if any */
- int firstFlag; /* flag value for first input relation */
- Cardinality numGroups; /* estimated number of groups in input */
+ List *groupList; /* SortGroupClauses identifying target cols */
+ Cardinality numGroups; /* estimated number of groups in left input */
} SetOpPath;
/*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 52f29bcdb6..4633121689 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -1225,23 +1225,20 @@ typedef struct SetOp
/* how to do it, see nodes.h */
SetOpStrategy strategy;
- /* number of columns to check for duplicate-ness */
+ /* number of columns to compare */
int numCols;
/* their indexes in the target list */
- AttrNumber *dupColIdx pg_node_attr(array_size(numCols));
+ AttrNumber *cmpColIdx pg_node_attr(array_size(numCols));
- /* equality operators to compare with */
- Oid *dupOperators pg_node_attr(array_size(numCols));
- Oid *dupCollations pg_node_attr(array_size(numCols));
-
- /* where is the flag column, if any */
- AttrNumber flagColIdx;
+ /* comparison operators (either equality operators or sort operators) */
+ Oid *cmpOperators pg_node_attr(array_size(numCols));
+ Oid *cmpCollations pg_node_attr(array_size(numCols));
- /* flag value for first input relation */
- int firstFlag;
+ /* nulls-first flags if sorting, otherwise not interesting */
+ bool *cmpNullsFirst pg_node_attr(array_size(numCols));
- /* estimated number of groups in input */
+ /* estimated number of groups in left input */
long numGroups;
} SetOp;
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 1035e6560c..5a6d0350c1 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -260,12 +260,11 @@ extern WindowAggPath *create_windowagg_path(PlannerInfo *root,
bool topwindow);
extern SetOpPath *create_setop_path(PlannerInfo *root,
RelOptInfo *rel,
- Path *subpath,
+ Path *leftpath,
+ Path *rightpath,
SetOpCmd cmd,
SetOpStrategy strategy,
- List *distinctList,
- AttrNumber flagColIdx,
- int firstFlag,
+ List *groupList,
double numGroups,
double outputRows);
extern RecursiveUnionPath *create_recursiveunion_path(PlannerInfo *root,
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 857ba1c40c..b4cd204de7 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1231,22 +1231,18 @@ select count(*) from
select * from onek i2 where i2.unique1 = o.unique2
) ss
where o.ten = 1;
- QUERY PLAN
-------------------------------------------------------------------------------
+ QUERY PLAN
+------------------------------------------------------------
Aggregate
-> Nested Loop
-> Seq Scan on onek o
Filter: (ten = 1)
- -> Subquery Scan on ss
- -> HashSetOp Except
- -> Append
- -> Subquery Scan on "*SELECT* 1"
- -> Index Scan using onek_unique1 on onek i1
- Index Cond: (unique1 = o.unique1)
- -> Subquery Scan on "*SELECT* 2"
- -> Index Scan using onek_unique1 on onek i2
- Index Cond: (unique1 = o.unique2)
-(13 rows)
+ -> HashSetOp Except
+ -> Index Scan using onek_unique1 on onek i1
+ Index Cond: (unique1 = o.unique1)
+ -> Index Scan using onek_unique1 on onek i2
+ Index Cond: (unique1 = o.unique2)
+(9 rows)
select count(*) from
onek o cross join lateral (
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index c73631a9a1..fbf24490e9 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -370,17 +370,13 @@ select count(*) from
explain (costs off)
select count(*) from
( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
- QUERY PLAN
-------------------------------------------------------------------------------------
+ QUERY PLAN
+------------------------------------------------------------------
Aggregate
- -> Subquery Scan on ss
- -> HashSetOp Intersect
- -> Append
- -> Subquery Scan on "*SELECT* 2"
- -> Seq Scan on tenk1
- -> Subquery Scan on "*SELECT* 1"
- -> Index Only Scan using tenk1_unique1 on tenk1 tenk1_1
-(8 rows)
+ -> HashSetOp Intersect
+ -> Seq Scan on tenk1
+ -> Index Only Scan using tenk1_unique1 on tenk1 tenk1_1
+(4 rows)
select count(*) from
( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
@@ -391,16 +387,13 @@ select count(*) from
explain (costs off)
select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
- QUERY PLAN
-------------------------------------------------------------------------
+ QUERY PLAN
+------------------------------------------------------------
HashSetOp Except
- -> Append
- -> Subquery Scan on "*SELECT* 1"
- -> Index Only Scan using tenk1_unique1 on tenk1
- -> Subquery Scan on "*SELECT* 2"
- -> Index Only Scan using tenk1_unique2 on tenk1 tenk1_1
- Filter: (unique2 <> 10)
-(7 rows)
+ -> Index Only Scan using tenk1_unique1 on tenk1
+ -> Index Only Scan using tenk1_unique2 on tenk1 tenk1_1
+ Filter: (unique2 <> 10)
+(4 rows)
select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
unique1
@@ -434,19 +427,17 @@ select count(*) from
explain (costs off)
select count(*) from
( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
- QUERY PLAN
-------------------------------------------------------------------------------------------
+ QUERY PLAN
+------------------------------------------------------------------------
Aggregate
- -> Subquery Scan on ss
- -> SetOp Intersect
- -> Sort
- Sort Key: "*SELECT* 2".fivethous
- -> Append
- -> Subquery Scan on "*SELECT* 2"
- -> Seq Scan on tenk1
- -> Subquery Scan on "*SELECT* 1"
- -> Index Only Scan using tenk1_unique1 on tenk1 tenk1_1
-(10 rows)
+ -> SetOp Intersect
+ -> Sort
+ Sort Key: tenk1.fivethous
+ -> Seq Scan on tenk1
+ -> Sort
+ Sort Key: tenk1_1.unique1
+ -> Index Only Scan using tenk1_unique1 on tenk1 tenk1_1
+(8 rows)
select count(*) from
( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
@@ -457,18 +448,17 @@ select count(*) from
explain (costs off)
select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
- QUERY PLAN
-------------------------------------------------------------------------------
+ QUERY PLAN
+------------------------------------------------------------------
SetOp Except
-> Sort
- Sort Key: "*SELECT* 1".unique1
- -> Append
- -> Subquery Scan on "*SELECT* 1"
- -> Index Only Scan using tenk1_unique1 on tenk1
- -> Subquery Scan on "*SELECT* 2"
- -> Index Only Scan using tenk1_unique2 on tenk1 tenk1_1
- Filter: (unique2 <> 10)
-(9 rows)
+ Sort Key: tenk1.unique1
+ -> Index Only Scan using tenk1_unique1 on tenk1
+ -> Sort
+ Sort Key: tenk1_1.unique2
+ -> Index Only Scan using tenk1_unique2 on tenk1 tenk1_1
+ Filter: (unique2 <> 10)
+(8 rows)
select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
unique1
@@ -528,15 +518,12 @@ select x from (values (array[1, 2]), (array[1, 3])) _(x) union select x from (va
explain (costs off)
select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4]))
_(x);
- QUERY PLAN
------------------------------------------------
+ QUERY PLAN
+-----------------------------------
HashSetOp Intersect
- -> Append
- -> Subquery Scan on "*SELECT* 1"
- -> Values Scan on "*VALUES*"
- -> Subquery Scan on "*SELECT* 2"
- -> Values Scan on "*VALUES*_1"
-(6 rows)
+ -> Values Scan on "*VALUES*"
+ -> Values Scan on "*VALUES*_1"
+(3 rows)
select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4]))
_(x);
x
@@ -546,15 +533,12 @@ select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from
explain (costs off)
select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4]))
_(x);
- QUERY PLAN
------------------------------------------------
+ QUERY PLAN
+-----------------------------------
HashSetOp Except
- -> Append
- -> Subquery Scan on "*SELECT* 1"
- -> Values Scan on "*VALUES*"
- -> Subquery Scan on "*SELECT* 2"
- -> Values Scan on "*VALUES*_1"
-(6 rows)
+ -> Values Scan on "*VALUES*"
+ -> Values Scan on "*VALUES*_1"
+(3 rows)
select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4]))
_(x);
x
@@ -606,17 +590,16 @@ select x from (values (array[1, 2]), (array[1, 3])) _(x) union select x from (va
explain (costs off)
select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4]))
_(x);
- QUERY PLAN
------------------------------------------------------
+ QUERY PLAN
+-----------------------------------------
SetOp Intersect
-> Sort
- Sort Key: "*SELECT* 1".x
- -> Append
- -> Subquery Scan on "*SELECT* 1"
- -> Values Scan on "*VALUES*"
- -> Subquery Scan on "*SELECT* 2"
- -> Values Scan on "*VALUES*_1"
-(8 rows)
+ Sort Key: "*VALUES*".column1
+ -> Values Scan on "*VALUES*"
+ -> Sort
+ Sort Key: "*VALUES*_1".column1
+ -> Values Scan on "*VALUES*_1"
+(7 rows)
select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4]))
_(x);
x
@@ -626,17 +609,16 @@ select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from
explain (costs off)
select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4]))
_(x);
- QUERY PLAN
------------------------------------------------------
+ QUERY PLAN
+-----------------------------------------
SetOp Except
-> Sort
- Sort Key: "*SELECT* 1".x
- -> Append
- -> Subquery Scan on "*SELECT* 1"
- -> Values Scan on "*VALUES*"
- -> Subquery Scan on "*SELECT* 2"
- -> Values Scan on "*VALUES*_1"
-(8 rows)
+ Sort Key: "*VALUES*".column1
+ -> Values Scan on "*VALUES*"
+ -> Sort
+ Sort Key: "*VALUES*_1".column1
+ -> Values Scan on "*VALUES*_1"
+(7 rows)
select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4]))
_(x);
x
@@ -669,17 +651,16 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values
explain (costs off)
select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
- QUERY PLAN
------------------------------------------------------
+ QUERY PLAN
+-----------------------------------------
SetOp Intersect
-> Sort
- Sort Key: "*SELECT* 1".x
- -> Append
- -> Subquery Scan on "*SELECT* 1"
- -> Values Scan on "*VALUES*"
- -> Subquery Scan on "*SELECT* 2"
- -> Values Scan on "*VALUES*_1"
-(8 rows)
+ Sort Key: "*VALUES*".column1
+ -> Values Scan on "*VALUES*"
+ -> Sort
+ Sort Key: "*VALUES*_1".column1
+ -> Values Scan on "*VALUES*_1"
+(7 rows)
select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
x
@@ -689,17 +670,16 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (va
explain (costs off)
select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
- QUERY PLAN
------------------------------------------------------
+ QUERY PLAN
+-----------------------------------------
SetOp Except
-> Sort
- Sort Key: "*SELECT* 1".x
- -> Append
- -> Subquery Scan on "*SELECT* 1"
- -> Values Scan on "*VALUES*"
- -> Subquery Scan on "*SELECT* 2"
- -> Values Scan on "*VALUES*_1"
-(8 rows)
+ Sort Key: "*VALUES*".column1
+ -> Values Scan on "*VALUES*"
+ -> Sort
+ Sort Key: "*VALUES*_1".column1
+ -> Values Scan on "*VALUES*_1"
+(7 rows)
select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
x
@@ -777,17 +757,16 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values
explain (costs off)
select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
- QUERY PLAN
------------------------------------------------------
+ QUERY PLAN
+-----------------------------------------
SetOp Intersect
-> Sort
- Sort Key: "*SELECT* 1".x
- -> Append
- -> Subquery Scan on "*SELECT* 1"
- -> Values Scan on "*VALUES*"
- -> Subquery Scan on "*SELECT* 2"
- -> Values Scan on "*VALUES*_1"
-(8 rows)
+ Sort Key: "*VALUES*".column1
+ -> Values Scan on "*VALUES*"
+ -> Sort
+ Sort Key: "*VALUES*_1".column1
+ -> Values Scan on "*VALUES*_1"
+(7 rows)
select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
x
@@ -797,17 +776,16 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (va
explain (costs off)
select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
- QUERY PLAN
------------------------------------------------------
+ QUERY PLAN
+-----------------------------------------
SetOp Except
-> Sort
- Sort Key: "*SELECT* 1".x
- -> Append
- -> Subquery Scan on "*SELECT* 1"
- -> Values Scan on "*VALUES*"
- -> Subquery Scan on "*SELECT* 2"
- -> Values Scan on "*VALUES*_1"
-(8 rows)
+ Sort Key: "*VALUES*".column1
+ -> Values Scan on "*VALUES*"
+ -> Sort
+ Sort Key: "*VALUES*_1".column1
+ -> Values Scan on "*VALUES*_1"
+(7 rows)
select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
x
@@ -970,15 +948,12 @@ set enable_sort = false;
-- no means to disable Unique.
explain (costs off)
select from generate_series(1,5) intersect select from generate_series(1,3);
- QUERY PLAN
-----------------------------------------------------------------------
+ QUERY PLAN
+----------------------------------------------------------
HashSetOp Intersect
- -> Append
- -> Subquery Scan on "*SELECT* 1"
- -> Function Scan on generate_series
- -> Subquery Scan on "*SELECT* 2"
- -> Function Scan on generate_series generate_series_1
-(6 rows)
+ -> Function Scan on generate_series
+ -> Function Scan on generate_series generate_series_1
+(3 rows)
select from generate_series(1,5) union all select from generate_series(1,3);
--
@@ -1015,15 +990,12 @@ select from generate_series(1,5) union select from generate_series(1,3);
explain (costs off)
select from generate_series(1,5) intersect select from generate_series(1,3);
- QUERY PLAN
-----------------------------------------------------------------------
+ QUERY PLAN
+----------------------------------------------------------
SetOp Intersect
- -> Append
- -> Subquery Scan on "*SELECT* 1"
- -> Function Scan on generate_series
- -> Subquery Scan on "*SELECT* 2"
- -> Function Scan on generate_series generate_series_1
-(6 rows)
+ -> Function Scan on generate_series
+ -> Function Scan on generate_series generate_series_1
+(3 rows)
select from generate_series(1,5) union select from generate_series(1,3);
--
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index ce33e55bf1..3bd71b78be 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2612,6 +2612,8 @@ SetOpCmd
SetOpPath
SetOpState
SetOpStatePerGroup
+SetOpStatePerGroupData
+SetOpStatePerInput
SetOpStrategy
SetOperation
SetOperationStmt