From 6e247a3f2c2479db8d959fd0b90c0dfa94817454 Mon Sep 17 00:00:00 2001
From: James Coleman <jtc331@gmail.com>
Date: Mon, 23 Mar 2020 21:59:32 -0400
Subject: [PATCH v40 4/7] fix lots of comments

---
 src/backend/commands/explain.c             |  24 +-
 src/backend/executor/nodeIncrementalSort.c | 275 +++++++++++++--------
 src/backend/optimizer/path/costsize.c      |   2 +-
 src/backend/optimizer/path/pathkeys.c      |  12 +-
 src/backend/optimizer/plan/planner.c       |   4 +-
 src/backend/utils/sort/tuplesort.c         |  11 +-
 src/include/nodes/execnodes.h              |   4 +
 7 files changed, 201 insertions(+), 131 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 0256dd42f1..cf8cfd31f5 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -2703,7 +2703,14 @@ show_sort_info(SortState *sortstate, ExplainState *es)
 	}
 }
 
-
+/*
+ * Incremental sort nodes sort in (a potentially very large number of) batches,
+ * so EXPLAIN ANALYZE needs to roll up the tuplesort stats from each batch into
+ * an intelligible summary.
+ *
+ * This function is used for both a non-parallel node and each worker in a
+ * parallel incremental sort node.
+ */
 static void
 show_incremental_sort_group_info(IncrementalSortGroupInfo *groupInfo,
 								 const char *groupLabel, ExplainState *es)
@@ -2769,7 +2776,7 @@ show_incremental_sort_group_info(IncrementalSortGroupInfo *groupInfo,
 		{
 			const char *sortMethodName = tuplesort_method_name(methodCell->int_value);
 
-			methodNames = lappend(methodNames, sortMethodName);
+			methodNames = lappend(methodNames, unconstify(char *, sortMethodName));
 		}
 		ExplainPropertyList("Sort Methods Used", methodNames, es);
 
@@ -2831,16 +2838,9 @@ show_incremental_sort_info(IncrementalSortState *incrsortstate,
 			&incrsortstate->shared_info->sinfo[n];
 
 			/*
-			 * XXX: The previous version of the patch chcked:
-			 * fullsort_instrument->sortMethod == SORT_TYPE_STILL_IN_PROGRESS
-			 * and continued if the condition was true (with the comment
-			 * "ignore any unfilled slots"). I'm not convinced that makes
-			 * sense since the same sort instrument can have been used
-			 * multiple times, so the last time it being used being still in
-			 * progress, doesn't seem to be relevant. Instead I'm now checking
-			 * to see if the group count for each group info is 0. If both are
-			 * 0, then we exclude the worker since it didn't contribute
-			 * anything meaningful.
+			 * If a worker hasn't process any sort groups at all, then exclude
+			 * it from output since it either didn't launch or didn't
+			 * contribute anything meaningful.
 			 */
 			fullsortGroupInfo = &incsort_info->fullsortGroupInfo;
 			prefixsortGroupInfo = &incsort_info->prefixsortGroupInfo;
diff --git a/src/backend/executor/nodeIncrementalSort.c b/src/backend/executor/nodeIncrementalSort.c
index 32ce05a63c..296a0c0675 100644
--- a/src/backend/executor/nodeIncrementalSort.c
+++ b/src/backend/executor/nodeIncrementalSort.c
@@ -68,6 +68,14 @@
 #include "utils/lsyscache.h"
 #include "utils/tuplesort.h"
 
+/* ----------------------------------------------------------------
+ * instrumentSortedGroup
+ *
+ * Because incremental sort processes (potentially many) sort batches, we need
+ * to capture tuplesort stats each time we finalize a sort state. This summary
+ * data is later used for EXPLAIN ANALYZE output.
+ * ----------------------------------------------------------------
+ */
 static void
 instrumentSortedGroup(PlanState *pstate, IncrementalSortGroupInfo *groupInfo,
 					  Tuplesortstate *sortState)
@@ -78,6 +86,8 @@ instrumentSortedGroup(PlanState *pstate, IncrementalSortGroupInfo *groupInfo,
 	groupInfo->groupCount++;
 
 	tuplesort_get_stats(sortState, &sort_instr);
+
+	/* Calculate total and maximum memory and disk space used. */
 	switch (sort_instr.spaceType)
 	{
 		case SORT_SPACE_TYPE_DISK:
@@ -94,6 +104,7 @@ instrumentSortedGroup(PlanState *pstate, IncrementalSortGroupInfo *groupInfo,
 			break;
 	}
 
+	/* Track each sort method we've used. */
 	if (!list_member_int(groupInfo->sortMethods, sort_instr.sortMethod))
 		groupInfo->sortMethods = lappend_int(groupInfo->sortMethods,
 											 sort_instr.sortMethod);
@@ -109,8 +120,11 @@ instrumentSortedGroup(PlanState *pstate, IncrementalSortGroupInfo *groupInfo,
 	}
 }
 
-/*
- * Prepare information for presorted_keys comparison.
+/* ----------------------------------------------------------------
+ * preparePresortedCols
+ *
+ * Prepare information for presorted_keys comparisons.
+ * ----------------------------------------------------------------
  */
 static void
 preparePresortedCols(IncrementalSortState *node)
@@ -153,11 +167,12 @@ preparePresortedCols(IncrementalSortState *node)
 	}
 }
 
-/*
- * Check whether a given tuple belongs to the current sort group.
+/* ----------------------------------------------------------------
+ * isCurrentGroup
  *
- * We do this by comparing its first 'presortedCols' column values to
- * the pivot tuple of the current group.
+ * Check whether a given tuple belongs to the current sort group by comparing
+ * the presorted column values to the pivot tuple of the current group.
+ * ----------------------------------------------------------------
  */
 static bool
 isCurrentGroup(IncrementalSortState *node, TupleTableSlot *pivot, TupleTableSlot *tuple)
@@ -214,8 +229,8 @@ isCurrentGroup(IncrementalSortState *node, TupleTableSlot *pivot, TupleTableSlot
 	return true;
 }
 
-/*
- * Switch to presorted prefix mode.
+/* ----------------------------------------------------------------
+ * switchToPresortedPrefixMode
  *
  * When we determine that we've likely encountered a large batch of tuples all
  * having the same presorted prefix values, we want to optimize tuplesort by
@@ -224,13 +239,14 @@ isCurrentGroup(IncrementalSortState *node, TupleTableSlot *pivot, TupleTableSlot
  * The problem is that we've already accumulated several tuples in another
  * tuplesort configured to sort by all columns (assuming that there may be
  * more than one prefix key group). So to switch to presorted prefix mode we
- * have to go back an look at all the tuples we've already accumulated and
+ * have to go back and look at all the tuples we've already accumulated to
  * verify they're all part of the same prefix key group before sorting them
  * solely by unsorted suffix keys.
  *
  * While it's likely that all already fetch tuples are all part of a single
  * prefix group, we also have to handle the possibility that there is at least
  * one different prefix key group before the large prefix key group.
+ * ----------------------------------------------------------------
  */
 static void
 switchToPresortedPrefixMode(PlanState *pstate)
@@ -248,6 +264,7 @@ switchToPresortedPrefixMode(PlanState *pstate)
 	outerNode = outerPlanState(node);
 	tupDesc = ExecGetResultType(outerNode);
 
+	/* Configure the prefix sort state the first time around. */
 	if (node->prefixsort_state == NULL)
 	{
 		Tuplesortstate *prefixsort_state;
@@ -287,6 +304,10 @@ switchToPresortedPrefixMode(PlanState *pstate)
 							node->bound - node->bound_Done);
 	}
 
+	/*
+	 * Copy as many tuples as we can (i.e., in the same prefix key group) from
+	 * the full sort state to the prefix sort state.
+	 */
 	for (;;)
 	{
 		lastTuple = node->n_fullsort_remaining - nTuples == 1;
@@ -326,7 +347,7 @@ switchToPresortedPrefixMode(PlanState *pstate)
 			{
 				/*
 				 * The tuple isn't part of the current batch so we need to
-				 * carry it over into the next set up tuples we transfer out
+				 * carry it over into the next batch of tuples we transfer out
 				 * of the full sort tuplesort into the presorted prefix
 				 * tuplesort. We don't actually have to do anything special to
 				 * save the tuple since we've already loaded it into the
@@ -344,12 +365,14 @@ switchToPresortedPrefixMode(PlanState *pstate)
 
 		firstTuple = false;
 
+		/*
+		 * If we've copied all of the tuples from the full sort state into the
+		 * prefix sort state, then we don't actually know that we've yet found
+		 * the last tuple in that prefix key group until we check the next tuple
+		 * from the outer plan node, so we retain the current group pivot tuple
+		 * prefix key group comparison.
+		 */
 		if (lastTuple)
-
-			/*
-			 * We retain the current group pivot tuple since we haven't yet
-			 * found the end of the current prefix key group.
-			 */
 			break;
 	}
 
@@ -387,9 +410,9 @@ switchToPresortedPrefixMode(PlanState *pstate)
 	{
 		/*
 		 * We finished a group but didn't consume all of the tuples from the
-		 * full sort batch sorter, so we'll sort this batch, let the inner
-		 * node read out all of those tuples, and then come back around to
-		 * find another batch.
+		 * full sort state, so we'll sort this batch, let the outer node read
+		 * out all of those tuples, and then come back around to find another
+		 * batch.
 		 */
 		SO1_printf("Sorting presorted prefix tuplesort with %ld tuples\n", nTuples);
 		tuplesort_performsort(node->prefixsort_state);
@@ -402,7 +425,7 @@ switchToPresortedPrefixMode(PlanState *pstate)
 		if (node->bounded)
 		{
 			/*
-			 * If the current node has a bound, and we've already sorted n
+			 * If the current node has a bound and we've already sorted n
 			 * tuples, then the functional bound remaining is (original bound
 			 * - n), so store the current number of processed tuples for use
 			 * in configuring sorting bound.
@@ -490,6 +513,11 @@ ExecIncrementalSort(PlanState *pstate)
 	dir = estate->es_direction;
 	fullsort_state = node->fullsort_state;
 
+	/*
+	 * If a previous iteration has sorted a batch, then we need to check to see
+	 * if there are any remaining tuples in that batch that we can return before
+	 * moving on to other execution states.
+	 */
 	if (node->execution_status == INCSORT_READFULLSORT
 		|| node->execution_status == INCSORT_READPREFIXSORT)
 	{
@@ -502,19 +530,18 @@ ExecIncrementalSort(PlanState *pstate)
 
 		/*
 		 * We have to populate the slot from the tuplesort before checking
-		 * outerNodeDone because it will NULL the slot if no more tuples
+		 * outerNodeDone because it will set the slot to NULL if no more tuples
 		 * remain. If the tuplesort is empty, but we don't have any more
-		 * tuples avaialable for sort from the outer node, then outerNodeDone
-		 * will have been set so we'll return the empty slot to the caller.
+		 * tuples available for sort from the outer node, then outerNodeDone
+		 * will have been set so we'll return that now-empty slot to the caller.
 		 */
 		if (tuplesort_gettupleslot(read_sortstate, ScanDirectionIsForward(dir),
 								   false, slot, NULL) || node->outerNodeDone)
 
 			/*
-			 * TODO: there isn't a good test case for the node->outerNodeDone
-			 * case directly, but lots of other stuff fails if it's not there.
-			 * If the outer node will fail when trying to fetch too many
-			 * tuples, then things break if this check isn't here.
+			 * Note: there isn't a good test case for the node->outerNodeDone
+			 * check directly, but we need it for any plan where the outer node
+			 * will fail when trying to fetch too many tuples.
 			 */
 			return slot;
 		else if (node->n_fullsort_remaining > 0)
@@ -524,10 +551,11 @@ ExecIncrementalSort(PlanState *pstate)
 			 * accumulated at least one additional prefix key group in the
 			 * full sort tuplesort. The first call to
 			 * switchToPresortedPrefixMode() will have pulled the first one of
-			 * those groups out, and we've returned those tuples to the inner
-			 * node, but if we tuples remaining in that tuplesort (i.e.,
-			 * n_fullsort_remaining > 0) at this point we need to do that
-			 * again.
+			 * those groups out, and we've returned those tuples to the parent
+			 * node, but if at this point we still have tuples remaining in the
+			 * full sort state (i.e., n_fullsort_remaining > 0), then we need to
+			 * re-execute the prefix mode transition function to pull out the
+			 * next prefix key group.
 			 */
 			SO1_printf("Re-calling switchToPresortedPrefixMode() because n_fullsort_remaining is > 0 (%ld)\n",
 					   node->n_fullsort_remaining);
@@ -536,10 +564,10 @@ ExecIncrementalSort(PlanState *pstate)
 		else
 		{
 			/*
-			 * If we don't have any already sorted tuples to read, and we're
-			 * not in the middle of transitioning into presorted prefix sort
-			 * mode, then it's time to start the process all over again by
-			 * building new full sort group.
+			 * If we don't have any sorted tuples to read and we're not
+			 * currently transitioning into presorted prefix sort mode, then
+			 * it's time to start the process all over again by building a new
+			 * group in the full sort state.
 			 */
 			SO_printf("Setting execution_status to INCSORT_LOADFULLSORT (n_fullsort_remaining > 0)\n");
 			node->execution_status = INCSORT_LOADFULLSORT;
@@ -547,24 +575,26 @@ ExecIncrementalSort(PlanState *pstate)
 	}
 
 	/*
-	 * Want to scan subplan in the forward direction while creating the sorted
-	 * data.
+	 * Scan the subplan in the forward direction while creating the sorted data.
 	 */
 	estate->es_direction = ForwardScanDirection;
 
 	outerNode = outerPlanState(node);
 	tupDesc = ExecGetResultType(outerNode);
 
+	/* Load tuples into the full sort state. */
 	if (node->execution_status == INCSORT_LOADFULLSORT)
 	{
 		/*
-		 * Initialize tuplesort module (only needed before the first group).
+		 * Initialize sorting structures.
 		 */
 		if (fullsort_state == NULL)
 		{
 			/*
 			 * Initialize presorted column support structures for
-			 * isCurrentGroup().
+			 * isCurrentGroup(). It's correct to do this along with the initial
+			 * intialization for the full sort state (and not for the prefix
+			 * sort state) since we always load the full sort state first.
 			 */
 			preparePresortedCols(node);
 
@@ -572,8 +602,7 @@ ExecIncrementalSort(PlanState *pstate)
 			 * Since we optimize small prefix key groups by accumulating a
 			 * minimum number of tuples before sorting, we can't assume that a
 			 * group of tuples all have the same prefix key values. Hence we
-			 * setup the full sort tuplesort to sort by all requested sort
-			 * columns.
+			 * setup the full sort tuplesort to sort by all requested sort keys.
 			 */
 			fullsort_state = tuplesort_begin_heap(tupDesc,
 												  plannode->sort.numCols,
@@ -588,13 +617,13 @@ ExecIncrementalSort(PlanState *pstate)
 		}
 		else
 		{
-			/* Reset sort for a new prefix key group. */
+			/* Reset sort for the next batch. */
 			tuplesort_reset(fullsort_state);
 		}
 
 		/*
-		 * Calculate the remaining tuples left if the bounded and configure
-		 * both bounded sort and the minimum group size accordingly.
+		 * Calculate the remaining tuples left if bounded and configure both
+		 * bounded sort and the minimum group size accordingly.
 		 */
 		if (node->bounded)
 		{
@@ -616,9 +645,9 @@ ExecIncrementalSort(PlanState *pstate)
 
 		/*
 		 * Because we have to read the next tuple to find out that we've
-		 * encountered a new prefix key group on subsequent groups we have to
+		 * encountered a new prefix key group, on subsequent groups we have to
 		 * carry over that extra tuple and add it to the new group's sort
-		 * here.
+		 * here before we read any new tuples from the outer node.
 		 */
 		if (!TupIsNull(node->group_pivot))
 		{
@@ -636,6 +665,11 @@ ExecIncrementalSort(PlanState *pstate)
 				ExecClearTuple(node->group_pivot);
 		}
 
+
+		/*
+		 * Pull as many tuples from the outer node as possible given our current
+		 * operating mode.
+		 */
 		for (;;)
 		{
 			/*
@@ -646,11 +680,16 @@ ExecIncrementalSort(PlanState *pstate)
 			slot = ExecProcNode(outerNode);
 
 			/*
-			 * When the outer node can't provide us any more tuples, then we
-			 * can sort the current group and return those tuples.
+			 * If the outer node can't provide us any more tuples, then we can
+			 * sort the current group and return those tuples.
 			 */
 			if (TupIsNull(slot))
 			{
+				/*
+				 * We need to know later if the outer node has completed to be
+				 * able to distinguish between being done with a batch and
+				 * being done with the whole node.
+				 */
 				node->outerNodeDone = true;
 
 				SO1_printf("Sorting fullsort with %ld tuples\n", nTuples);
@@ -670,25 +709,31 @@ ExecIncrementalSort(PlanState *pstate)
 			if (nTuples < minGroupSize)
 			{
 				/*
-				 * If we have yet hit our target minimum group size, then
-				 * don't both with checking for inclusion in the current
-				 * prefix group since a large number of very tiny sorts is
-				 * inefficient.
+				 * If we haven't yet hit our target minimum group size, then
+				 * we don't need to bother checking for inclusion in the current
+				 * prefix group since at this point we'll assume that we'll full
+				 * sort this batch to avoid a large number of very tiny (and thus
+				 * inefficient) sorts.
 				 */
 				tuplesort_puttupleslot(fullsort_state, slot);
 				nTuples++;
 
-				/* Keep the last tuple of our minimal group as a pivot. */
+				/*
+				 * If we've reach our minimum group size, then we need to store
+				 * the most recent tuple as a pivot.
+				 */
 				if (nTuples == minGroupSize)
 					ExecCopySlot(node->group_pivot, slot);
 			}
 			else
 			{
 				/*
-				 * Once we've accumulated a minimum number of tuples, we start
-				 * checking for a new prefix key group. Only after we find
-				 * changed prefix keys can we guarantee sort stability of the
-				 * tuples we've already accumulated.
+				 * If we've already accumulated enough tuples to reach our
+				 * minimum group size, then we need to compare any additional
+				 * tuples to our pivot tuple to see if we reach the end of that
+				 * prefix key group. Only after we find changed prefix keys can
+				 * we guarantee sort stability of the tuples we've already
+				 * accumulated.
 				 */
 				if (isCurrentGroup(node, node->group_pivot, slot))
 				{
@@ -703,11 +748,10 @@ ExecIncrementalSort(PlanState *pstate)
 				{
 					/*
 					 * Since the tuple we fetched isn't part of the current
-					 * prefix key group we can't sort it as part of this sort
-					 * group. Instead we need to carry it over to the next
-					 * group. We use the group_pivot slot as a temp container
-					 * for that purpose even though we won't actually treat it
-					 * as a group pivot.
+					 * prefix key group we don't want to  sort it as part of
+					 * the current batch. Instead we use the group_pivot slot to
+					 * carry it over to the next batch (even though we won't
+					 * actually treat it as a group pivot).
 					 */
 					ExecCopySlot(node->group_pivot, slot);
 
@@ -717,8 +761,8 @@ ExecIncrementalSort(PlanState *pstate)
 						 * If the current node has a bound, and we've already
 						 * sorted n tuples, then the functional bound
 						 * remaining is (original bound - n), so store the
-						 * current number of processed tuples for use in
-						 * configuring sorting bound.
+						 * current number of processed tuples for later use
+						 * configuring the sort state's bound.
 						 */
 						SO2_printf("Changing bound_Done from %ld to %ld\n",
 								   node->bound_Done,
@@ -728,7 +772,8 @@ ExecIncrementalSort(PlanState *pstate)
 
 					/*
 					 * Once we find changed prefix keys we can complete the
-					 * sort and begin reading out the sorted tuples.
+					 * sort and transition modes to reading out the sorted
+					 * tuples.
 					 */
 					SO1_printf("Sorting fullsort tuplesort with %ld tuples\n",
 							   nTuples);
@@ -746,18 +791,24 @@ ExecIncrementalSort(PlanState *pstate)
 			}
 
 			/*
-			 * Once we've processed DEFAULT_MAX_FULL_SORT_GROUP_SIZE tuples
-			 * then we make the assumption that it's likely that we've found a
-			 * large group of tuples having a single prefix key (as long as
-			 * the last tuple didn't shift us into reading from the full sort
-			 * mode tuplesort).
+			 * Unless we've alrady transitioned modes to reading from the full
+			 * sort state, then we assume that having read at least
+			 * DEFAULT_MAX_FULL_SORT_GROUP_SIZE tuples means it's likely we're
+			 * processing a large group of tuples all having equal prefix keys
+			 * (but haven't yet found the final tuple in that prefix key group),
+			 * so we need to transition in to presorted prefix mode.
 			 */
 			if (nTuples > DEFAULT_MAX_FULL_SORT_GROUP_SIZE &&
 				node->execution_status != INCSORT_READFULLSORT)
 			{
 				/*
 				 * The group pivot we have stored has already been put into
-				 * the tuplesort; we don't want to carry it over.
+				 * the tuplesort; we don't want to carry it over. Since we
+				 * haven't yet found the end of the prefix key group, it might
+				 * seem like we should keep this, but we don't actually know
+				 * how many prefix key groups might be represented in the full
+				 * sort state, so we'll let the mode transition function manage
+				 * this state for us.
 				 */
 				ExecClearTuple(node->group_pivot);
 
@@ -783,7 +834,7 @@ ExecIncrementalSort(PlanState *pstate)
 				 * haven't yet completed fetching the current prefix key group
 				 * because the tuples we've "lost" already sorted "below" the
 				 * retained ones, and we're already contractually guaranteed
-				 * to not need any more than the currentBount tuples.
+				 * to not need any more than the currentBound tuples.
 				 */
 				if (tuplesort_used_bound(node->fullsort_state))
 				{
@@ -798,10 +849,9 @@ ExecIncrementalSort(PlanState *pstate)
 						   nTuples);
 
 				/*
-				 * Track the number of tuples we need to move from the
-				 * fullsort to presorted prefix sort (we might have multiple
-				 * prefix key groups, so we need a way to see if we've
-				 * actually finished).
+				 * We might have multiple prefix key groups in the full sort
+				 * state, so the mode transition function needs to know the
+				 * it needs to move from the fullsort to presorted prefix sort.
 				 */
 				node->n_fullsort_remaining = nTuples;
 
@@ -828,50 +878,62 @@ ExecIncrementalSort(PlanState *pstate)
 	if (node->execution_status == INCSORT_LOADPREFIXSORT)
 	{
 		/*
-		 * Since we only enter this state after determining that all remaining
-		 * tuples in the full sort tuplesort have the same prefix, we've
-		 * already established a current group pivot tuple (but wasn't carried
-		 * over; it's already been put into the prefix sort tuplesort).
+		 * We only enter this state after the mode transition function has
+		 * confirmed all remaining tuples from the full sort state have the same
+		 * prefix and moved those tuples to the prefix sort state. That
+		 * function has also set a group pivot tuple (which doesn't need to be
+		 * carried over; it's already been put into the prefix sort state).
 		 */
 		Assert(!TupIsNull(node->group_pivot));
 
+		/*
+		 * Read tuples from the outer node and load them into the prefix sort
+		 * state until we encounter a tuple whose prefix keys don't match the
+		 * current group_pivot tuple, since we can't guarantee sort stability
+		 * until we have all tuples matching those prefix keys.
+		 */
 		for (;;)
 		{
 			slot = ExecProcNode(outerNode);
 
-			/* Check to see if there are no more tuples to fetch. */
+			/*
+			 * If we've exhausted tuples from the outer node we're done loading
+			 * the prefix sort state.
+			 */
 			if (TupIsNull(slot))
 			{
+				/*
+				 * We need to know later if the outer node has completed to be
+				 * able to distinguish between being done with a batch and
+				 * being done with the whole node.
+				 */
 				node->outerNodeDone = true;
 				break;
 			}
 
+			/*
+			 * If the tuple's prefix keys match our pivot tuple, we're not done
+			 * yet and can load it into the prefix sort state. If not, we don't
+			 * want to  sort it as part of the current batch. Instead we use the
+			 * group_pivot slot to carry it over to the next batch (even though
+			 * we won't actually treat it as a group pivot).
+			 */
 			if (isCurrentGroup(node, node->group_pivot, slot))
 			{
-				/*
-				 * Fetch tuples and put them into the presorted prefix
-				 * tuplesort until we find changed prefix keys. Only then can
-				 * we guarantee sort stability of the tuples we've already
-				 * accumulated.
-				 */
 				tuplesort_puttupleslot(node->prefixsort_state, slot);
 				nTuples++;
 			}
 			else
 			{
-				/*
-				 * Since the tuple we fetched isn't part of the current prefix
-				 * key group we can't sort it as part of this sort group.
-				 * Instead we need to carry it over to the next group. We use
-				 * the group_pivot slot as a temp container for that purpose
-				 * even though we won't actually treat it as a group pivot.
-				 */
 				ExecCopySlot(node->group_pivot, slot);
 				break;
 			}
 		}
 
-		/* Perform the sort and return the tuples to the inner plan nodes. */
+		/*
+		 * Perform the sort and begin returning the tuples to the parent plan
+		 * node.
+		 */
 		SO1_printf("Sorting presorted prefix tuplesort with >= %ld tuples\n", nTuples);
 		tuplesort_performsort(node->prefixsort_state);
 
@@ -935,16 +997,14 @@ ExecInitIncrementalSort(IncrementalSort *node, EState *estate, int eflags)
 
 	/*
 	 * Incremental sort can't be used with either EXEC_FLAG_REWIND,
-	 * EXEC_FLAG_BACKWARD or EXEC_FLAG_MARK, because we hold only current
-	 * bucket in tuplesortstate.
+	 * EXEC_FLAG_BACKWARD or EXEC_FLAG_MARK, because we only one of many sort
+	 * batches in the current sort state.
 	 */
 	Assert((eflags & (EXEC_FLAG_REWIND |
 					  EXEC_FLAG_BACKWARD |
 					  EXEC_FLAG_MARK)) == 0);
 
-	/*
-	 * create state structure
-	 */
+	/* Initialize state structure. */
 	incrsortstate = makeNode(IncrementalSortState);
 	incrsortstate->ss.ps.plan = (Plan *) node;
 	incrsortstate->ss.ps.state = estate;
@@ -990,7 +1050,7 @@ ExecInitIncrementalSort(IncrementalSort *node, EState *estate, int eflags)
 	 */
 
 	/*
-	 * initialize child nodes
+	 * Initialize child nodes.
 	 *
 	 * We shield the child node from the need to support REWIND, BACKWARD, or
 	 * MARK/RESTORE.
@@ -1006,12 +1066,15 @@ ExecInitIncrementalSort(IncrementalSort *node, EState *estate, int eflags)
 
 	/*
 	 * Initialize return slot and type. No need to initialize projection info
-	 * because this node doesn't do projections.
+	 * because we don't do any projections.
 	 */
 	ExecInitResultTupleSlotTL(&incrsortstate->ss.ps, &TTSOpsMinimalTuple);
 	incrsortstate->ss.ps.ps_ProjInfo = NULL;
 
-	/* make standalone slot to store previous tuple from outer node */
+	/*
+	 * Initialize standalone slots to store a tuple for pivot prefix keys and
+	 * for carrying over a tuple from one batch to the next.
+	 */
 	incrsortstate->group_pivot =
 		MakeSingleTupleTableSlot(ExecGetResultType(outerPlanState(incrsortstate)),
 								 &TTSOpsMinimalTuple);
@@ -1033,13 +1096,11 @@ ExecEndIncrementalSort(IncrementalSortState *node)
 {
 	SO_printf("ExecEndIncrementalSort: shutting down sort node\n");
 
-	/*
-	 * clean out the tuple table
-	 */
+	/* clean out the scan tuple */
 	ExecClearTuple(node->ss.ss_ScanTupleSlot);
 	/* must drop pointer to sort result tuple */
 	ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
-	/* must drop stanalone tuple slot from outer node */
+	/* must drop stanalone tuple slots from outer node */
 	ExecDropSingleTupleTableSlot(node->group_pivot);
 	ExecDropSingleTupleTableSlot(node->transfer_tuple);
 
@@ -1086,6 +1147,8 @@ ExecReScanIncrementalSort(IncrementalSortState *node)
 	node->outerNodeDone = false;
 
 	/*
+	 * XXX: This is suspect.
+	 *
 	 * If subnode is to be rescanned then we forget previous sort results; we
 	 * have to re-read the subplan and re-sort.  Also must re-sort if the
 	 * bounded-sort parameters changed or we didn't select randomAccess.
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 8efbb660b9..a59926fa02 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1802,7 +1802,7 @@ cost_full_sort(Cost *startup_cost, Cost *run_cost,
 /*
  * cost_incremental_sort
  * 	Determines and returns the cost of sorting a relation incrementally, when
- *  the input path is already sorted by some of the pathkeys.
+ *  the input path is presorted by a prefix of the pathkeys.
  *
  * 'presorted_keys' is the number of leading pathkeys by which the input path
  * is sorted.
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 74799cd8fd..be569f56fd 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1831,9 +1831,10 @@ right_merge_direction(PlannerInfo *root, PathKey *pathkey)
  *		Count the number of pathkeys that are useful for meeting the
  *		query's requested output ordering.
  *
- * Unlike merge pathkeys, this is an all-or-nothing affair: it does us
- * no good to order by just the first key(s) of the requested ordering.
- * So the result is always either 0 or list_length(root->query_pathkeys).
+ * Because we the have the possibility of incremental sort, a prefix list of
+ * keys is potentially useful for improving the performance of the requested
+ * ordering. Thus we return 0, if no valuable keys are found, or the number
+ * of leading keys shared by the list and the requested ordering..
  */
 static int
 pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
@@ -1849,11 +1850,6 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	(void) pathkeys_common_contained_in(root->query_pathkeys, pathkeys,
 										&n_common_pathkeys);
 
-	/*
-	 * Return the number of path keys in common, or 0 if there are none. Any
-	 * leading common pathkeys could be useful for ordering because we can use
-	 * the incremental sort.
-	 */
 	return n_common_pathkeys;
 }
 
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index c194263cf8..423ac25827 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -4922,8 +4922,8 @@ create_distinct_paths(PlannerInfo *root,
  * Build a new upperrel containing Paths for ORDER BY evaluation.
  *
  * All paths in the result must satisfy the ORDER BY ordering.
- * The only new paths we need consider is an explicit full or
- * incremental sort on the cheapest-total existing path.
+ * The only new paths we need consider are an explicit full sort
+ * and incremental sort on the cheapest-total existing path.
  *
  * input_rel: contains the source-data Paths
  * target: the output tlist the result Paths must emit
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 77c15ebd78..99d64a88af 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -804,7 +804,11 @@ tuplesort_begin_common(int workMem, SortCoordinate coordinate,
 }
 
 /*
- * XXX Missing comment.
+ *		tuplesort_begin_batch
+ *
+ * Setup, or reset, all state need for processing a new set of tuples with this
+ * sort state. Called both from tuplesort_begin_common (the first time sorting
+ * with this sort state) and tuplesort_reseti (for subsequent usages).
  */
 static void
 tuplesort_begin_batch(Tuplesortstate *state)
@@ -1291,8 +1295,11 @@ tuplesort_set_bound(Tuplesortstate *state, int64 bound)
 	state->sortKeys->abbrev_full_comparator = NULL;
 }
 
+
 /*
- * XXX Missing comment.
+ * tuplesort_used_bound
+ *
+ * Allow callers to find out if the sort state was able to use a bound.
  */
 bool
 tuplesort_used_bound(Tuplesortstate *state)
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index e6a9b67675..71ac1417ab 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2023,6 +2023,10 @@ typedef struct SortState
 	SharedSortInfo *shared_info;	/* one entry per worker */
 } SortState;
 
+/* ----------------
+ *	 Instruementation information for IncrementalSort
+ * ----------------
+ */
 typedef struct IncrementalSortGroupInfo
 {
 	int64		groupCount;
-- 
2.17.1