From 6e247a3f2c2479db8d959fd0b90c0dfa94817454 Mon Sep 17 00:00:00 2001 From: James Coleman Date: Mon, 23 Mar 2020 21:59:32 -0400 Subject: [PATCH v40 4/7] fix lots of comments --- src/backend/commands/explain.c | 24 +- src/backend/executor/nodeIncrementalSort.c | 275 +++++++++++++-------- src/backend/optimizer/path/costsize.c | 2 +- src/backend/optimizer/path/pathkeys.c | 12 +- src/backend/optimizer/plan/planner.c | 4 +- src/backend/utils/sort/tuplesort.c | 11 +- src/include/nodes/execnodes.h | 4 + 7 files changed, 201 insertions(+), 131 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 0256dd42f1..cf8cfd31f5 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -2703,7 +2703,14 @@ show_sort_info(SortState *sortstate, ExplainState *es) } } - +/* + * Incremental sort nodes sort in (a potentially very large number of) batches, + * so EXPLAIN ANALYZE needs to roll up the tuplesort stats from each batch into + * an intelligible summary. + * + * This function is used for both a non-parallel node and each worker in a + * parallel incremental sort node. + */ static void show_incremental_sort_group_info(IncrementalSortGroupInfo *groupInfo, const char *groupLabel, ExplainState *es) @@ -2769,7 +2776,7 @@ show_incremental_sort_group_info(IncrementalSortGroupInfo *groupInfo, { const char *sortMethodName = tuplesort_method_name(methodCell->int_value); - methodNames = lappend(methodNames, sortMethodName); + methodNames = lappend(methodNames, unconstify(char *, sortMethodName)); } ExplainPropertyList("Sort Methods Used", methodNames, es); @@ -2831,16 +2838,9 @@ show_incremental_sort_info(IncrementalSortState *incrsortstate, &incrsortstate->shared_info->sinfo[n]; /* - * XXX: The previous version of the patch chcked: - * fullsort_instrument->sortMethod == SORT_TYPE_STILL_IN_PROGRESS - * and continued if the condition was true (with the comment - * "ignore any unfilled slots"). I'm not convinced that makes - * sense since the same sort instrument can have been used - * multiple times, so the last time it being used being still in - * progress, doesn't seem to be relevant. Instead I'm now checking - * to see if the group count for each group info is 0. If both are - * 0, then we exclude the worker since it didn't contribute - * anything meaningful. + * If a worker hasn't process any sort groups at all, then exclude + * it from output since it either didn't launch or didn't + * contribute anything meaningful. */ fullsortGroupInfo = &incsort_info->fullsortGroupInfo; prefixsortGroupInfo = &incsort_info->prefixsortGroupInfo; diff --git a/src/backend/executor/nodeIncrementalSort.c b/src/backend/executor/nodeIncrementalSort.c index 32ce05a63c..296a0c0675 100644 --- a/src/backend/executor/nodeIncrementalSort.c +++ b/src/backend/executor/nodeIncrementalSort.c @@ -68,6 +68,14 @@ #include "utils/lsyscache.h" #include "utils/tuplesort.h" +/* ---------------------------------------------------------------- + * instrumentSortedGroup + * + * Because incremental sort processes (potentially many) sort batches, we need + * to capture tuplesort stats each time we finalize a sort state. This summary + * data is later used for EXPLAIN ANALYZE output. + * ---------------------------------------------------------------- + */ static void instrumentSortedGroup(PlanState *pstate, IncrementalSortGroupInfo *groupInfo, Tuplesortstate *sortState) @@ -78,6 +86,8 @@ instrumentSortedGroup(PlanState *pstate, IncrementalSortGroupInfo *groupInfo, groupInfo->groupCount++; tuplesort_get_stats(sortState, &sort_instr); + + /* Calculate total and maximum memory and disk space used. */ switch (sort_instr.spaceType) { case SORT_SPACE_TYPE_DISK: @@ -94,6 +104,7 @@ instrumentSortedGroup(PlanState *pstate, IncrementalSortGroupInfo *groupInfo, break; } + /* Track each sort method we've used. */ if (!list_member_int(groupInfo->sortMethods, sort_instr.sortMethod)) groupInfo->sortMethods = lappend_int(groupInfo->sortMethods, sort_instr.sortMethod); @@ -109,8 +120,11 @@ instrumentSortedGroup(PlanState *pstate, IncrementalSortGroupInfo *groupInfo, } } -/* - * Prepare information for presorted_keys comparison. +/* ---------------------------------------------------------------- + * preparePresortedCols + * + * Prepare information for presorted_keys comparisons. + * ---------------------------------------------------------------- */ static void preparePresortedCols(IncrementalSortState *node) @@ -153,11 +167,12 @@ preparePresortedCols(IncrementalSortState *node) } } -/* - * Check whether a given tuple belongs to the current sort group. +/* ---------------------------------------------------------------- + * isCurrentGroup * - * We do this by comparing its first 'presortedCols' column values to - * the pivot tuple of the current group. + * Check whether a given tuple belongs to the current sort group by comparing + * the presorted column values to the pivot tuple of the current group. + * ---------------------------------------------------------------- */ static bool isCurrentGroup(IncrementalSortState *node, TupleTableSlot *pivot, TupleTableSlot *tuple) @@ -214,8 +229,8 @@ isCurrentGroup(IncrementalSortState *node, TupleTableSlot *pivot, TupleTableSlot return true; } -/* - * Switch to presorted prefix mode. +/* ---------------------------------------------------------------- + * switchToPresortedPrefixMode * * When we determine that we've likely encountered a large batch of tuples all * having the same presorted prefix values, we want to optimize tuplesort by @@ -224,13 +239,14 @@ isCurrentGroup(IncrementalSortState *node, TupleTableSlot *pivot, TupleTableSlot * The problem is that we've already accumulated several tuples in another * tuplesort configured to sort by all columns (assuming that there may be * more than one prefix key group). So to switch to presorted prefix mode we - * have to go back an look at all the tuples we've already accumulated and + * have to go back and look at all the tuples we've already accumulated to * verify they're all part of the same prefix key group before sorting them * solely by unsorted suffix keys. * * While it's likely that all already fetch tuples are all part of a single * prefix group, we also have to handle the possibility that there is at least * one different prefix key group before the large prefix key group. + * ---------------------------------------------------------------- */ static void switchToPresortedPrefixMode(PlanState *pstate) @@ -248,6 +264,7 @@ switchToPresortedPrefixMode(PlanState *pstate) outerNode = outerPlanState(node); tupDesc = ExecGetResultType(outerNode); + /* Configure the prefix sort state the first time around. */ if (node->prefixsort_state == NULL) { Tuplesortstate *prefixsort_state; @@ -287,6 +304,10 @@ switchToPresortedPrefixMode(PlanState *pstate) node->bound - node->bound_Done); } + /* + * Copy as many tuples as we can (i.e., in the same prefix key group) from + * the full sort state to the prefix sort state. + */ for (;;) { lastTuple = node->n_fullsort_remaining - nTuples == 1; @@ -326,7 +347,7 @@ switchToPresortedPrefixMode(PlanState *pstate) { /* * The tuple isn't part of the current batch so we need to - * carry it over into the next set up tuples we transfer out + * carry it over into the next batch of tuples we transfer out * of the full sort tuplesort into the presorted prefix * tuplesort. We don't actually have to do anything special to * save the tuple since we've already loaded it into the @@ -344,12 +365,14 @@ switchToPresortedPrefixMode(PlanState *pstate) firstTuple = false; + /* + * If we've copied all of the tuples from the full sort state into the + * prefix sort state, then we don't actually know that we've yet found + * the last tuple in that prefix key group until we check the next tuple + * from the outer plan node, so we retain the current group pivot tuple + * prefix key group comparison. + */ if (lastTuple) - - /* - * We retain the current group pivot tuple since we haven't yet - * found the end of the current prefix key group. - */ break; } @@ -387,9 +410,9 @@ switchToPresortedPrefixMode(PlanState *pstate) { /* * We finished a group but didn't consume all of the tuples from the - * full sort batch sorter, so we'll sort this batch, let the inner - * node read out all of those tuples, and then come back around to - * find another batch. + * full sort state, so we'll sort this batch, let the outer node read + * out all of those tuples, and then come back around to find another + * batch. */ SO1_printf("Sorting presorted prefix tuplesort with %ld tuples\n", nTuples); tuplesort_performsort(node->prefixsort_state); @@ -402,7 +425,7 @@ switchToPresortedPrefixMode(PlanState *pstate) if (node->bounded) { /* - * If the current node has a bound, and we've already sorted n + * If the current node has a bound and we've already sorted n * tuples, then the functional bound remaining is (original bound * - n), so store the current number of processed tuples for use * in configuring sorting bound. @@ -490,6 +513,11 @@ ExecIncrementalSort(PlanState *pstate) dir = estate->es_direction; fullsort_state = node->fullsort_state; + /* + * If a previous iteration has sorted a batch, then we need to check to see + * if there are any remaining tuples in that batch that we can return before + * moving on to other execution states. + */ if (node->execution_status == INCSORT_READFULLSORT || node->execution_status == INCSORT_READPREFIXSORT) { @@ -502,19 +530,18 @@ ExecIncrementalSort(PlanState *pstate) /* * We have to populate the slot from the tuplesort before checking - * outerNodeDone because it will NULL the slot if no more tuples + * outerNodeDone because it will set the slot to NULL if no more tuples * remain. If the tuplesort is empty, but we don't have any more - * tuples avaialable for sort from the outer node, then outerNodeDone - * will have been set so we'll return the empty slot to the caller. + * tuples available for sort from the outer node, then outerNodeDone + * will have been set so we'll return that now-empty slot to the caller. */ if (tuplesort_gettupleslot(read_sortstate, ScanDirectionIsForward(dir), false, slot, NULL) || node->outerNodeDone) /* - * TODO: there isn't a good test case for the node->outerNodeDone - * case directly, but lots of other stuff fails if it's not there. - * If the outer node will fail when trying to fetch too many - * tuples, then things break if this check isn't here. + * Note: there isn't a good test case for the node->outerNodeDone + * check directly, but we need it for any plan where the outer node + * will fail when trying to fetch too many tuples. */ return slot; else if (node->n_fullsort_remaining > 0) @@ -524,10 +551,11 @@ ExecIncrementalSort(PlanState *pstate) * accumulated at least one additional prefix key group in the * full sort tuplesort. The first call to * switchToPresortedPrefixMode() will have pulled the first one of - * those groups out, and we've returned those tuples to the inner - * node, but if we tuples remaining in that tuplesort (i.e., - * n_fullsort_remaining > 0) at this point we need to do that - * again. + * those groups out, and we've returned those tuples to the parent + * node, but if at this point we still have tuples remaining in the + * full sort state (i.e., n_fullsort_remaining > 0), then we need to + * re-execute the prefix mode transition function to pull out the + * next prefix key group. */ SO1_printf("Re-calling switchToPresortedPrefixMode() because n_fullsort_remaining is > 0 (%ld)\n", node->n_fullsort_remaining); @@ -536,10 +564,10 @@ ExecIncrementalSort(PlanState *pstate) else { /* - * If we don't have any already sorted tuples to read, and we're - * not in the middle of transitioning into presorted prefix sort - * mode, then it's time to start the process all over again by - * building new full sort group. + * If we don't have any sorted tuples to read and we're not + * currently transitioning into presorted prefix sort mode, then + * it's time to start the process all over again by building a new + * group in the full sort state. */ SO_printf("Setting execution_status to INCSORT_LOADFULLSORT (n_fullsort_remaining > 0)\n"); node->execution_status = INCSORT_LOADFULLSORT; @@ -547,24 +575,26 @@ ExecIncrementalSort(PlanState *pstate) } /* - * Want to scan subplan in the forward direction while creating the sorted - * data. + * Scan the subplan in the forward direction while creating the sorted data. */ estate->es_direction = ForwardScanDirection; outerNode = outerPlanState(node); tupDesc = ExecGetResultType(outerNode); + /* Load tuples into the full sort state. */ if (node->execution_status == INCSORT_LOADFULLSORT) { /* - * Initialize tuplesort module (only needed before the first group). + * Initialize sorting structures. */ if (fullsort_state == NULL) { /* * Initialize presorted column support structures for - * isCurrentGroup(). + * isCurrentGroup(). It's correct to do this along with the initial + * intialization for the full sort state (and not for the prefix + * sort state) since we always load the full sort state first. */ preparePresortedCols(node); @@ -572,8 +602,7 @@ ExecIncrementalSort(PlanState *pstate) * Since we optimize small prefix key groups by accumulating a * minimum number of tuples before sorting, we can't assume that a * group of tuples all have the same prefix key values. Hence we - * setup the full sort tuplesort to sort by all requested sort - * columns. + * setup the full sort tuplesort to sort by all requested sort keys. */ fullsort_state = tuplesort_begin_heap(tupDesc, plannode->sort.numCols, @@ -588,13 +617,13 @@ ExecIncrementalSort(PlanState *pstate) } else { - /* Reset sort for a new prefix key group. */ + /* Reset sort for the next batch. */ tuplesort_reset(fullsort_state); } /* - * Calculate the remaining tuples left if the bounded and configure - * both bounded sort and the minimum group size accordingly. + * Calculate the remaining tuples left if bounded and configure both + * bounded sort and the minimum group size accordingly. */ if (node->bounded) { @@ -616,9 +645,9 @@ ExecIncrementalSort(PlanState *pstate) /* * Because we have to read the next tuple to find out that we've - * encountered a new prefix key group on subsequent groups we have to + * encountered a new prefix key group, on subsequent groups we have to * carry over that extra tuple and add it to the new group's sort - * here. + * here before we read any new tuples from the outer node. */ if (!TupIsNull(node->group_pivot)) { @@ -636,6 +665,11 @@ ExecIncrementalSort(PlanState *pstate) ExecClearTuple(node->group_pivot); } + + /* + * Pull as many tuples from the outer node as possible given our current + * operating mode. + */ for (;;) { /* @@ -646,11 +680,16 @@ ExecIncrementalSort(PlanState *pstate) slot = ExecProcNode(outerNode); /* - * When the outer node can't provide us any more tuples, then we - * can sort the current group and return those tuples. + * If the outer node can't provide us any more tuples, then we can + * sort the current group and return those tuples. */ if (TupIsNull(slot)) { + /* + * We need to know later if the outer node has completed to be + * able to distinguish between being done with a batch and + * being done with the whole node. + */ node->outerNodeDone = true; SO1_printf("Sorting fullsort with %ld tuples\n", nTuples); @@ -670,25 +709,31 @@ ExecIncrementalSort(PlanState *pstate) if (nTuples < minGroupSize) { /* - * If we have yet hit our target minimum group size, then - * don't both with checking for inclusion in the current - * prefix group since a large number of very tiny sorts is - * inefficient. + * If we haven't yet hit our target minimum group size, then + * we don't need to bother checking for inclusion in the current + * prefix group since at this point we'll assume that we'll full + * sort this batch to avoid a large number of very tiny (and thus + * inefficient) sorts. */ tuplesort_puttupleslot(fullsort_state, slot); nTuples++; - /* Keep the last tuple of our minimal group as a pivot. */ + /* + * If we've reach our minimum group size, then we need to store + * the most recent tuple as a pivot. + */ if (nTuples == minGroupSize) ExecCopySlot(node->group_pivot, slot); } else { /* - * Once we've accumulated a minimum number of tuples, we start - * checking for a new prefix key group. Only after we find - * changed prefix keys can we guarantee sort stability of the - * tuples we've already accumulated. + * If we've already accumulated enough tuples to reach our + * minimum group size, then we need to compare any additional + * tuples to our pivot tuple to see if we reach the end of that + * prefix key group. Only after we find changed prefix keys can + * we guarantee sort stability of the tuples we've already + * accumulated. */ if (isCurrentGroup(node, node->group_pivot, slot)) { @@ -703,11 +748,10 @@ ExecIncrementalSort(PlanState *pstate) { /* * Since the tuple we fetched isn't part of the current - * prefix key group we can't sort it as part of this sort - * group. Instead we need to carry it over to the next - * group. We use the group_pivot slot as a temp container - * for that purpose even though we won't actually treat it - * as a group pivot. + * prefix key group we don't want to sort it as part of + * the current batch. Instead we use the group_pivot slot to + * carry it over to the next batch (even though we won't + * actually treat it as a group pivot). */ ExecCopySlot(node->group_pivot, slot); @@ -717,8 +761,8 @@ ExecIncrementalSort(PlanState *pstate) * If the current node has a bound, and we've already * sorted n tuples, then the functional bound * remaining is (original bound - n), so store the - * current number of processed tuples for use in - * configuring sorting bound. + * current number of processed tuples for later use + * configuring the sort state's bound. */ SO2_printf("Changing bound_Done from %ld to %ld\n", node->bound_Done, @@ -728,7 +772,8 @@ ExecIncrementalSort(PlanState *pstate) /* * Once we find changed prefix keys we can complete the - * sort and begin reading out the sorted tuples. + * sort and transition modes to reading out the sorted + * tuples. */ SO1_printf("Sorting fullsort tuplesort with %ld tuples\n", nTuples); @@ -746,18 +791,24 @@ ExecIncrementalSort(PlanState *pstate) } /* - * Once we've processed DEFAULT_MAX_FULL_SORT_GROUP_SIZE tuples - * then we make the assumption that it's likely that we've found a - * large group of tuples having a single prefix key (as long as - * the last tuple didn't shift us into reading from the full sort - * mode tuplesort). + * Unless we've alrady transitioned modes to reading from the full + * sort state, then we assume that having read at least + * DEFAULT_MAX_FULL_SORT_GROUP_SIZE tuples means it's likely we're + * processing a large group of tuples all having equal prefix keys + * (but haven't yet found the final tuple in that prefix key group), + * so we need to transition in to presorted prefix mode. */ if (nTuples > DEFAULT_MAX_FULL_SORT_GROUP_SIZE && node->execution_status != INCSORT_READFULLSORT) { /* * The group pivot we have stored has already been put into - * the tuplesort; we don't want to carry it over. + * the tuplesort; we don't want to carry it over. Since we + * haven't yet found the end of the prefix key group, it might + * seem like we should keep this, but we don't actually know + * how many prefix key groups might be represented in the full + * sort state, so we'll let the mode transition function manage + * this state for us. */ ExecClearTuple(node->group_pivot); @@ -783,7 +834,7 @@ ExecIncrementalSort(PlanState *pstate) * haven't yet completed fetching the current prefix key group * because the tuples we've "lost" already sorted "below" the * retained ones, and we're already contractually guaranteed - * to not need any more than the currentBount tuples. + * to not need any more than the currentBound tuples. */ if (tuplesort_used_bound(node->fullsort_state)) { @@ -798,10 +849,9 @@ ExecIncrementalSort(PlanState *pstate) nTuples); /* - * Track the number of tuples we need to move from the - * fullsort to presorted prefix sort (we might have multiple - * prefix key groups, so we need a way to see if we've - * actually finished). + * We might have multiple prefix key groups in the full sort + * state, so the mode transition function needs to know the + * it needs to move from the fullsort to presorted prefix sort. */ node->n_fullsort_remaining = nTuples; @@ -828,50 +878,62 @@ ExecIncrementalSort(PlanState *pstate) if (node->execution_status == INCSORT_LOADPREFIXSORT) { /* - * Since we only enter this state after determining that all remaining - * tuples in the full sort tuplesort have the same prefix, we've - * already established a current group pivot tuple (but wasn't carried - * over; it's already been put into the prefix sort tuplesort). + * We only enter this state after the mode transition function has + * confirmed all remaining tuples from the full sort state have the same + * prefix and moved those tuples to the prefix sort state. That + * function has also set a group pivot tuple (which doesn't need to be + * carried over; it's already been put into the prefix sort state). */ Assert(!TupIsNull(node->group_pivot)); + /* + * Read tuples from the outer node and load them into the prefix sort + * state until we encounter a tuple whose prefix keys don't match the + * current group_pivot tuple, since we can't guarantee sort stability + * until we have all tuples matching those prefix keys. + */ for (;;) { slot = ExecProcNode(outerNode); - /* Check to see if there are no more tuples to fetch. */ + /* + * If we've exhausted tuples from the outer node we're done loading + * the prefix sort state. + */ if (TupIsNull(slot)) { + /* + * We need to know later if the outer node has completed to be + * able to distinguish between being done with a batch and + * being done with the whole node. + */ node->outerNodeDone = true; break; } + /* + * If the tuple's prefix keys match our pivot tuple, we're not done + * yet and can load it into the prefix sort state. If not, we don't + * want to sort it as part of the current batch. Instead we use the + * group_pivot slot to carry it over to the next batch (even though + * we won't actually treat it as a group pivot). + */ if (isCurrentGroup(node, node->group_pivot, slot)) { - /* - * Fetch tuples and put them into the presorted prefix - * tuplesort until we find changed prefix keys. Only then can - * we guarantee sort stability of the tuples we've already - * accumulated. - */ tuplesort_puttupleslot(node->prefixsort_state, slot); nTuples++; } else { - /* - * Since the tuple we fetched isn't part of the current prefix - * key group we can't sort it as part of this sort group. - * Instead we need to carry it over to the next group. We use - * the group_pivot slot as a temp container for that purpose - * even though we won't actually treat it as a group pivot. - */ ExecCopySlot(node->group_pivot, slot); break; } } - /* Perform the sort and return the tuples to the inner plan nodes. */ + /* + * Perform the sort and begin returning the tuples to the parent plan + * node. + */ SO1_printf("Sorting presorted prefix tuplesort with >= %ld tuples\n", nTuples); tuplesort_performsort(node->prefixsort_state); @@ -935,16 +997,14 @@ ExecInitIncrementalSort(IncrementalSort *node, EState *estate, int eflags) /* * Incremental sort can't be used with either EXEC_FLAG_REWIND, - * EXEC_FLAG_BACKWARD or EXEC_FLAG_MARK, because we hold only current - * bucket in tuplesortstate. + * EXEC_FLAG_BACKWARD or EXEC_FLAG_MARK, because we only one of many sort + * batches in the current sort state. */ Assert((eflags & (EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)) == 0); - /* - * create state structure - */ + /* Initialize state structure. */ incrsortstate = makeNode(IncrementalSortState); incrsortstate->ss.ps.plan = (Plan *) node; incrsortstate->ss.ps.state = estate; @@ -990,7 +1050,7 @@ ExecInitIncrementalSort(IncrementalSort *node, EState *estate, int eflags) */ /* - * initialize child nodes + * Initialize child nodes. * * We shield the child node from the need to support REWIND, BACKWARD, or * MARK/RESTORE. @@ -1006,12 +1066,15 @@ ExecInitIncrementalSort(IncrementalSort *node, EState *estate, int eflags) /* * Initialize return slot and type. No need to initialize projection info - * because this node doesn't do projections. + * because we don't do any projections. */ ExecInitResultTupleSlotTL(&incrsortstate->ss.ps, &TTSOpsMinimalTuple); incrsortstate->ss.ps.ps_ProjInfo = NULL; - /* make standalone slot to store previous tuple from outer node */ + /* + * Initialize standalone slots to store a tuple for pivot prefix keys and + * for carrying over a tuple from one batch to the next. + */ incrsortstate->group_pivot = MakeSingleTupleTableSlot(ExecGetResultType(outerPlanState(incrsortstate)), &TTSOpsMinimalTuple); @@ -1033,13 +1096,11 @@ ExecEndIncrementalSort(IncrementalSortState *node) { SO_printf("ExecEndIncrementalSort: shutting down sort node\n"); - /* - * clean out the tuple table - */ + /* clean out the scan tuple */ ExecClearTuple(node->ss.ss_ScanTupleSlot); /* must drop pointer to sort result tuple */ ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); - /* must drop stanalone tuple slot from outer node */ + /* must drop stanalone tuple slots from outer node */ ExecDropSingleTupleTableSlot(node->group_pivot); ExecDropSingleTupleTableSlot(node->transfer_tuple); @@ -1086,6 +1147,8 @@ ExecReScanIncrementalSort(IncrementalSortState *node) node->outerNodeDone = false; /* + * XXX: This is suspect. + * * If subnode is to be rescanned then we forget previous sort results; we * have to re-read the subplan and re-sort. Also must re-sort if the * bounded-sort parameters changed or we didn't select randomAccess. diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 8efbb660b9..a59926fa02 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1802,7 +1802,7 @@ cost_full_sort(Cost *startup_cost, Cost *run_cost, /* * cost_incremental_sort * Determines and returns the cost of sorting a relation incrementally, when - * the input path is already sorted by some of the pathkeys. + * the input path is presorted by a prefix of the pathkeys. * * 'presorted_keys' is the number of leading pathkeys by which the input path * is sorted. diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 74799cd8fd..be569f56fd 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -1831,9 +1831,10 @@ right_merge_direction(PlannerInfo *root, PathKey *pathkey) * Count the number of pathkeys that are useful for meeting the * query's requested output ordering. * - * Unlike merge pathkeys, this is an all-or-nothing affair: it does us - * no good to order by just the first key(s) of the requested ordering. - * So the result is always either 0 or list_length(root->query_pathkeys). + * Because we the have the possibility of incremental sort, a prefix list of + * keys is potentially useful for improving the performance of the requested + * ordering. Thus we return 0, if no valuable keys are found, or the number + * of leading keys shared by the list and the requested ordering.. */ static int pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) @@ -1849,11 +1850,6 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) (void) pathkeys_common_contained_in(root->query_pathkeys, pathkeys, &n_common_pathkeys); - /* - * Return the number of path keys in common, or 0 if there are none. Any - * leading common pathkeys could be useful for ordering because we can use - * the incremental sort. - */ return n_common_pathkeys; } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index c194263cf8..423ac25827 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4922,8 +4922,8 @@ create_distinct_paths(PlannerInfo *root, * Build a new upperrel containing Paths for ORDER BY evaluation. * * All paths in the result must satisfy the ORDER BY ordering. - * The only new paths we need consider is an explicit full or - * incremental sort on the cheapest-total existing path. + * The only new paths we need consider are an explicit full sort + * and incremental sort on the cheapest-total existing path. * * input_rel: contains the source-data Paths * target: the output tlist the result Paths must emit diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 77c15ebd78..99d64a88af 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -804,7 +804,11 @@ tuplesort_begin_common(int workMem, SortCoordinate coordinate, } /* - * XXX Missing comment. + * tuplesort_begin_batch + * + * Setup, or reset, all state need for processing a new set of tuples with this + * sort state. Called both from tuplesort_begin_common (the first time sorting + * with this sort state) and tuplesort_reseti (for subsequent usages). */ static void tuplesort_begin_batch(Tuplesortstate *state) @@ -1291,8 +1295,11 @@ tuplesort_set_bound(Tuplesortstate *state, int64 bound) state->sortKeys->abbrev_full_comparator = NULL; } + /* - * XXX Missing comment. + * tuplesort_used_bound + * + * Allow callers to find out if the sort state was able to use a bound. */ bool tuplesort_used_bound(Tuplesortstate *state) diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index e6a9b67675..71ac1417ab 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -2023,6 +2023,10 @@ typedef struct SortState SharedSortInfo *shared_info; /* one entry per worker */ } SortState; +/* ---------------- + * Instruementation information for IncrementalSort + * ---------------- + */ typedef struct IncrementalSortGroupInfo { int64 groupCount; -- 2.17.1