From b4e51dde2670dac744371c3a5b37177a2c307a59 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Fri, 3 May 2024 21:15:01 +1200 Subject: [PATCH v3 3/3] Optimize memory management and increase performance of Materialize Here we make tuplestore.c use a generation.c memory context rather than allocating tuples into the ExecutorState memory context. Doing that could cause that context to become bloated when pfree'd chunks are not reused by future tuples. This speeds up users of tuplestore.c such as the Materialize, WindowAgg and CTE Scan executor nodes. Some benchmarks have shown up to a 22% performance increase in a query containing a Materialize node, but much higher gains are possible if the memory reduction prevents spilling to disk. This is especially true for WindowAgg nodes where improvements of several thousand times are possible. A generation.c memory context is much better suited for this job as it works well with FIFO palloc/pfree patterns and consumes less space as there is no rounding allocations up to the next power-of-2 value as there is with the aset.c context used for ExecutorState. This also allows us to more efficiently cleanup memory used by the tuplestore as we can reset the generation context rather than looping over all stored tuples and pfree'ing them. Also, remove a badly placed USEMEM call in readtup_heap(). The tuple wasn't being allocated in the Tuplestorestate's context, so no need to adjust the memory consumed by the tuplestore there. Discussion: https://postgr.es/m/CAApHDvp5Py9g4Rjq7_inL3-MCK1Co2CRt_YWFwTU2zfQix0p4A%40mail.gmail.com --- src/backend/utils/sort/tuplestore.c | 52 ++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c index 24bb49ca87..551f0f0a19 100644 --- a/src/backend/utils/sort/tuplestore.c +++ b/src/backend/utils/sort/tuplestore.c @@ -266,7 +266,11 @@ tuplestore_begin_common(int eflags, bool interXact, int maxKBytes) state->availMem = state->allowedMem; state->maxSpace = 0; state->myfile = NULL; - state->context = CurrentMemoryContext; + + /* palloc/pfree uses FIFO pattern, which is ideal for generation.c */ + state->context = GenerationContextCreate(CurrentMemoryContext, + "tuplestore tuples", + ALLOCSET_DEFAULT_SIZES); state->resowner = CurrentResourceOwner; state->memtupdeleted = 0; @@ -429,14 +433,38 @@ tuplestore_clear(Tuplestorestate *state) if (state->myfile) BufFileClose(state->myfile); state->myfile = NULL; - if (state->memtuples) + +#ifdef USE_ASSERT_CHECKING { + int64 availMem = state->availMem; + + /* + * Below, we reset the memory context for storing tuples. To save + * from having to always call GetMemoryChunkSpace() on all stored + * tuples, we adjust the availMem to forget all the tuples and just + * recall USEMEM for the space used by the memtuples array. Here we + * just Assert that's correct and the memory tracking hasn't gone + * wrong anywhere. + */ for (i = state->memtupdeleted; i < state->memtupcount; i++) - { - FREEMEM(state, GetMemoryChunkSpace(state->memtuples[i])); - pfree(state->memtuples[i]); - } + availMem += GetMemoryChunkSpace(state->memtuples[i]); + + availMem += GetMemoryChunkSpace(state->memtuples); + + Assert(availMem == state->allowedMem); } +#endif + + /* clear the memory consumed by the memory tuples */ + MemoryContextReset(state->context); + + /* + * Zero the used memory and re-consume the space for the memtuples array. + * This saves having to FREEMEM for each stored tuple. + */ + state->availMem = state->allowedMem; + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + state->status = TSS_INMEM; state->truncated = false; state->memtupdeleted = 0; @@ -458,16 +486,11 @@ tuplestore_clear(Tuplestorestate *state) void tuplestore_end(Tuplestorestate *state) { - int i; - if (state->myfile) BufFileClose(state->myfile); - if (state->memtuples) - { - for (i = state->memtupdeleted; i < state->memtupcount; i++) - pfree(state->memtuples[i]); - pfree(state->memtuples); - } + + MemoryContextDelete(state->context); + pfree(state->memtuples); pfree(state->readptrs); pfree(state); } @@ -1578,7 +1601,6 @@ readtup_heap(Tuplestorestate *state, unsigned int len) MinimalTuple tuple = (MinimalTuple) palloc(tuplen); char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET; - USEMEM(state, GetMemoryChunkSpace(tuple)); /* read in the tuple proper */ tuple->t_len = tuplen; BufFileReadExact(state->myfile, tupbody, tupbodylen); -- 2.34.1