From e56feda19b2c908e85750e778646717c3d82292b Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Thu, 6 Mar 2025 02:41:58 +0100 Subject: [PATCH v20250307 4/4] Make Gin parallel builds use a single tuplesort This reduces the size requirement of scratch space and reduces the cycles we have to spend on passing data around. As another benefit, it reduces the code size and complexity of GIN builds. --- src/backend/access/gin/gininsert.c | 175 +---------------------------- 1 file changed, 1 insertion(+), 174 deletions(-) diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 80cabae99b1..f3a7f375fb2 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -164,14 +164,6 @@ typedef struct * build callback etc. */ Tuplesortstate *bs_sortstate; - - /* - * The sortstate used only within a single worker for the first merge pass - * happenning there. In principle it doesn't need to be part of the build - * state and we could pass it around directly, but it's more convenient - * this way. And it's part of the build state, after all. - */ - Tuplesortstate *bs_worker_sort; } GinBuildState; @@ -508,7 +500,7 @@ ginFlushBuildState(GinBuildState *buildstate, Relation index) key, attr->attlen, attr->attbyval, list, nlist); - tuplesort_putgintuple(buildstate->bs_worker_sort, tup); + tuplesort_putgintuple(buildstate->bs_sortstate, tup); pfree(tup); } @@ -2034,158 +2026,6 @@ _gin_leader_participate_as_worker(GinBuildState *buildstate, Relation heap, Rela sortmem, true); } -/* - * _gin_process_worker_data - * First phase of the key merging, happening in the worker. - * - * Depending on the number of distinct keys, the TID lists produced by the - * callback may be very short (due to frequent evictions in the callback). - * But combining many tiny lists is expensive, so we try to do as much as - * possible in the workers and only then pass the results to the leader. - * - * We read the tuples sorted by the key, and merge them into larger lists. - * At the moment there's no memory limit, so this will just produce one - * huge (sorted) list per key in each worker. Which means the leader will - * do a very limited number of mergesorts, which is good. - */ -static void -_gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort, - bool progress) -{ - GinTuple *tup; - Size tuplen; - - GinBuffer *buffer; - - /* - * Initialize buffer to combine entries for the same key. - * - * The workers are limited to the same amount of memory as during the sort - * in ginBuildCallbackParallel. But this probably should be the 32MB used - * during planning, just like there. - */ - buffer = GinBufferInit(state->ginstate.index); - - /* sort the raw per-worker data */ - if (progress) - pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE, - PROGRESS_GIN_PHASE_PERFORMSORT_1); - - tuplesort_performsort(state->bs_worker_sort); - - /* reset the number of GIN tuples produced by this worker */ - state->bs_numtuples = 0; - - if (progress) - pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE, - PROGRESS_GIN_PHASE_MERGE_1); - - /* - * Read the GIN tuples from the shared tuplesort, sorted by the key, and - * merge them into larger chunks for the leader to combine. - */ - while ((tup = tuplesort_getgintuple(worker_sort, &tuplen, true)) != NULL) - { - - CHECK_FOR_INTERRUPTS(); - - /* - * If the buffer can accept the new GIN tuple, just store it there and - * we're done. If it's a different key (or maybe too much data) flush - * the current contents into the index first. - */ - if (!GinBufferCanAddKey(buffer, tup)) - { - GinTuple *ntup; - - /* - * Buffer is not empty and it's storing a different key - flush - * the data into the insert, and start a new entry for current - * GinTuple. - */ - AssertCheckItemPointers(buffer); - - ntup = _gin_build_tuple(buffer->attnum, buffer->category, - buffer->key, buffer->typlen, buffer->typbyval, - buffer->items, buffer->nitems); - - tuplesort_putgintuple(state->bs_sortstate, ntup); - state->bs_numtuples++; - - pfree(ntup); - - /* discard the existing data */ - GinBufferReset(buffer); - } - - if (buffer->cached) - GinBufferUnpackCached(buffer, tup->nitems); - - /* - * We're about to add a GIN tuple to the buffer - check the memory - * limit first, and maybe write out some of the data into the index - * first, if needed (and possible). We only flush the part of the TID - * list that we know won't change, and only if there's enough data for - * compression to work well. - */ - if (GinBufferShouldTrim(buffer, tup)) - { - GinTuple *ntup; - - Assert(buffer->nfrozen > 0); - - /* - * Buffer is not empty and it's storing a different key - flush - * the data into the insert, and start a new entry for current - * GinTuple. - */ - AssertCheckItemPointers(buffer); - - ntup = _gin_build_tuple(buffer->attnum, buffer->category, - buffer->key, buffer->typlen, buffer->typbyval, - buffer->items, buffer->nfrozen); - - tuplesort_putgintuple(state->bs_sortstate, ntup); - - pfree(ntup); - - /* truncate the data we've just discarded */ - GinBufferTrim(buffer); - } - - /* - * Remember data for the current tuple (either remember the new key, - * or append if to the existing data). - */ - GinBufferStoreOrMergeTuple(buffer, tup); - } - - /* flush data remaining in the buffer (for the last key) */ - if (!GinBufferIsEmpty(buffer)) - { - GinTuple *ntup; - - AssertCheckItemPointers(buffer); - - ntup = _gin_build_tuple(buffer->attnum, buffer->category, - buffer->key, buffer->typlen, buffer->typbyval, - buffer->items, buffer->nitems); - - tuplesort_putgintuple(state->bs_sortstate, ntup); - state->bs_numtuples++; - - pfree(ntup); - - /* discard the existing data */ - GinBufferReset(buffer); - } - - /* relase all the memory */ - GinBufferFree(buffer); - - tuplesort_end(worker_sort); -} - /* * Perform a worker's portion of a parallel GIN index build sort. * @@ -2252,12 +2092,6 @@ _gin_parallel_scan_and_build(GinBuildState *state, coordinate, TUPLESORT_NONE); - /* Local per-worker sort of raw-data */ - state->bs_worker_sort = tuplesort_begin_index_gin(heap, index, - state->work_mem, - NULL, - TUPLESORT_NONE); - /* Join parallel scan */ indexInfo = BuildIndexInfo(index); indexInfo->ii_Concurrent = ginshared->isconcurrent; @@ -2271,13 +2105,6 @@ _gin_parallel_scan_and_build(GinBuildState *state, /* write remaining accumulated entries */ ginFlushBuildState(state, index); - /* - * Do the first phase of in-worker processing - sort the data produced by - * the callback, and combine them into much larger chunks and place that - * into the shared tuplestore for leader to process. - */ - _gin_process_worker_data(state, state->bs_worker_sort, progress); - /* sort the GIN tuples built by this worker */ tuplesort_performsort(state->bs_sortstate); -- 2.45.2