From 1b6cb18b11f63da1acc53b3c85b0b8e38de979d1 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Thu, 2 May 2024 15:21:43 +0200 Subject: [PATCH v20240619 09/11] Collect and print compression stats Allows evaluating the benefits of compressing the TID lists. --- src/backend/access/gin/gininsert.c | 36 +++++++++++++++++++++++++----- src/include/access/gin.h | 2 ++ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 9b640bfe5f6..47007aa63b4 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -189,7 +189,8 @@ static void _gin_parallel_scan_and_build(GinBuildState *buildstate, static ItemPointer _gin_parse_tuple_items(GinTuple *a); static Datum _gin_parse_tuple_key(GinTuple *a); -static GinTuple *_gin_build_tuple(OffsetNumber attrnum, unsigned char category, +static GinTuple *_gin_build_tuple(GinBuildState *state, + OffsetNumber attrnum, unsigned char category, Datum key, int16 typlen, bool typbyval, ItemPointerData *items, uint32 nitems, Size *len); @@ -538,7 +539,7 @@ ginBuildCallbackParallel(Relation index, ItemPointer tid, Datum *values, /* there could be many entries, so be willing to abort here */ CHECK_FOR_INTERRUPTS(); - tup = _gin_build_tuple(attnum, category, + tup = _gin_build_tuple(buildstate, attnum, category, key, attr->attlen, attr->attbyval, list, nlist, &tuplen); @@ -1530,6 +1531,15 @@ _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort) /* sort the raw per-worker data */ tuplesort_performsort(state->bs_worker_sort); + /* print some basic info */ + elog(LOG, "_gin_parallel_scan_and_build raw %zu compressed %zu ratio %.2f%%", + state->buildStats.sizeRaw, state->buildStats.sizeCompressed, + (100.0 * state->buildStats.sizeCompressed) / state->buildStats.sizeRaw); + + /* reset before the second phase */ + state->buildStats.sizeCompressed = 0; + state->buildStats.sizeRaw = 0; + /* * Read the GIN tuples from the shared tuplesort, sorted by the key, and * merge them into larger chunks for the leader to combine. @@ -1556,7 +1566,7 @@ _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort) */ AssertCheckItemPointers(buffer, true); - ntup = _gin_build_tuple(buffer->attnum, buffer->category, + ntup = _gin_build_tuple(state, buffer->attnum, buffer->category, buffer->key, buffer->typlen, buffer->typbyval, buffer->items, buffer->nitems, &ntuplen); @@ -1583,7 +1593,7 @@ _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort) AssertCheckItemPointers(buffer, true); - ntup = _gin_build_tuple(buffer->attnum, buffer->category, + ntup = _gin_build_tuple(state, buffer->attnum, buffer->category, buffer->key, buffer->typlen, buffer->typbyval, buffer->items, buffer->nitems, &ntuplen); @@ -1598,6 +1608,11 @@ _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort) /* relase all the memory */ GinBufferFree(buffer); + /* print some basic info */ + elog(LOG, "_gin_process_worker_data raw %zu compressed %zu ratio %.2f%%", + state->buildStats.sizeRaw, state->buildStats.sizeCompressed, + (100.0 * state->buildStats.sizeCompressed) / state->buildStats.sizeRaw); + tuplesort_end(worker_sort); } @@ -1669,7 +1684,7 @@ _gin_parallel_scan_and_build(GinBuildState *state, /* there could be many entries, so be willing to abort here */ CHECK_FOR_INTERRUPTS(); - tup = _gin_build_tuple(attnum, category, + tup = _gin_build_tuple(state, attnum, category, key, attr->attlen, attr->attbyval, list, nlist, &len); @@ -1763,6 +1778,7 @@ _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc) /* initialize the GIN build state */ initGinState(&buildstate.ginstate, indexRel); buildstate.indtuples = 0; + /* XXX shouldn't this initialize the other fiedls, like ginbuild()? */ memset(&buildstate.buildStats, 0, sizeof(GinStatsData)); /* @@ -1840,7 +1856,8 @@ typedef struct * of that into the GIN tuple. */ static GinTuple * -_gin_build_tuple(OffsetNumber attrnum, unsigned char category, +_gin_build_tuple(GinBuildState *state, + OffsetNumber attrnum, unsigned char category, Datum key, int16 typlen, bool typbyval, ItemPointerData *items, uint32 nitems, Size *len) @@ -1971,6 +1988,13 @@ _gin_build_tuple(OffsetNumber attrnum, unsigned char category, pfree(seginfo); } + /* how large would the tuple be without compression? */ + state->buildStats.sizeRaw += MAXALIGN(offsetof(GinTuple, data) + keylen) + + nitems * sizeof(ItemPointerData); + + /* compressed size */ + state->buildStats.sizeCompressed += tuplen; + return tuple; } diff --git a/src/include/access/gin.h b/src/include/access/gin.h index be76d8446f4..2b6633d068a 100644 --- a/src/include/access/gin.h +++ b/src/include/access/gin.h @@ -49,6 +49,8 @@ typedef struct GinStatsData BlockNumber nDataPages; int64 nEntries; int32 ginVersion; + Size sizeRaw; + Size sizeCompressed; } GinStatsData; /* -- 2.45.2