diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 1452e8c..8ab2e9d 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -267,12 +267,14 @@ struct Tuplesortstate * and FINALMERGE, the tuples are organized in "heap" order per Algorithm * H. (Note that memtupcount only counts the tuples that are part of the * heap --- during merge passes, memtuples[] entries beyond tapeRange are - * never in the heap and are used to hold pre-read tuples.) In state - * SORTEDONTAPE, the array is not used. + * never in the heap and are used to hold pre-read tuples; and while + * building runs, we temporarily stash tuples destined for the next run + * at the end of the array.) In state SORTEDONTAPE, the array is not used. */ SortTuple *memtuples; /* array of SortTuple structs */ int memtupcount; /* number of tuples currently present */ int memtupsize; /* allocated length of memtuples array */ + int memtupnextrun; /* tuples saved at end for next run */ /* * While building initial runs, this is the current output run number @@ -1169,13 +1171,12 @@ puttuple_common(Tuplesortstate *state, SortTuple *tuple) case TSS_BUILDRUNS: /* - * Insert the tuple into the heap, with run number currentRun if - * it can go into the current run, else run number currentRun+1. - * The tuple can go into the current run if it is >= the first - * not-yet-output tuple. (Actually, it could go into the current - * run if it is >= the most recently output tuple ... but that - * would require keeping around the tuple we last output, and it's - * simplest to let writetup free each tuple as soon as it's + * Insert the tuple into the heap. The tuple can go into the + * current run if it is >= the first not-yet-output tuple; if not, + * we save it for the next run. (Actually, it could go into the + * current run if it is >= the most recently output tuple ... but + * that would require keeping around the tuple we last output, and + * it's simplest to let writetup free each tuple as soon as it's * written.) * * Note there will always be at least one tuple in the heap at @@ -1183,9 +1184,12 @@ puttuple_common(Tuplesortstate *state, SortTuple *tuple) */ Assert(state->memtupcount > 0); if (COMPARETUP(state, tuple, &state->memtuples[0]) >= 0) - tuplesort_heap_insert(state, tuple, state->currentRun, true); + tuplesort_heap_insert(state, tuple, state->currentRun, false); else - tuplesort_heap_insert(state, tuple, state->currentRun + 1, true); + { + state->memtuples[state->memtupsize - ++state->memtupnextrun] = + *tuple; + } /* * If we are over the memory limit, dump tuples till we're under. @@ -2173,7 +2177,7 @@ dumptuples(Tuplesortstate *state, bool alltuples) { while (alltuples || (LACKMEM(state) && state->memtupcount > 1) || - state->memtupcount >= state->memtupsize) + state->memtupcount + state->memtupnextrun >= state->memtupsize) { /* * Dump the heap's frontmost entry, and sift up to remove it from the @@ -2185,12 +2189,13 @@ dumptuples(Tuplesortstate *state, bool alltuples) tuplesort_heap_siftup(state, true); /* - * If the heap is empty *or* top run number has changed, we've - * finished the current run. + * If the heap is empty, we might be completely done reading the input, + * or we might just have finished the current run. */ - if (state->memtupcount == 0 || - state->currentRun != state->memtuples[0].tupindex) + if (state->memtupcount == 0) { + int index; + markrunend(state, state->tp_tapenum[state->destTape]); state->currentRun++; state->tp_runs[state->destTape]++; @@ -2207,10 +2212,26 @@ dumptuples(Tuplesortstate *state, bool alltuples) /* * Done if heap is empty, else prepare for new run. */ - if (state->memtupcount == 0) + if (state->memtupnextrun == 0) break; Assert(state->currentRun == state->memtuples[0].tupindex); selectnewtape(state); + + /* + * Build a heap out of the tuples saved for the next run. We + * must insert these in ascending index order; otherwise, the + * growing heap might clobber an entry that hasn't been added + * yet. + * + * XXX. We pass false to tuplesort_heap_insert() here since the + * entire heap has the same run number at this point, but it + * would be better to get rid of the tupIndex stuff altogether. + */ + index = state->memtupsize - state->memtupnextrun; + while (index < state->memtupsize) + tuplesort_heap_insert(state, &state->memtuples[index++], + state->currentRun, false); + state->memtupnextrun = 0; } } }