From 04dc8612e9863f8a03833017e463310e2b43fb42 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Thu, 2 May 2024 15:21:43 +0200 Subject: [PATCH v20240712 05/10] Collect and print compression stats Allows evaluating the benefits of compressing the TID lists. --- src/backend/access/gin/gininsert.c | 42 +++++++++++++++++++++++------- src/include/access/gin.h | 2 ++ 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 59e35fd1e0..7a2d377d94 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -191,7 +191,8 @@ static void _gin_parallel_scan_and_build(GinBuildState *buildstate, static ItemPointer _gin_parse_tuple_items(GinTuple *a); static Datum _gin_parse_tuple_key(GinTuple *a); -static GinTuple *_gin_build_tuple(OffsetNumber attrnum, unsigned char category, +static GinTuple *_gin_build_tuple(GinBuildState *state, + OffsetNumber attrnum, unsigned char category, Datum key, int16 typlen, bool typbyval, ItemPointerData *items, uint32 nitems, Size *len); @@ -554,7 +555,7 @@ ginBuildCallbackParallel(Relation index, ItemPointer tid, Datum *values, /* there could be many entries, so be willing to abort here */ CHECK_FOR_INTERRUPTS(); - tup = _gin_build_tuple(attnum, category, + tup = _gin_build_tuple(buildstate, attnum, category, key, attr->attlen, attr->attbyval, list, nlist, &tuplen); @@ -1199,9 +1200,9 @@ AssertCheckGinBuffer(GinBuffer *buffer) /* * we don't know if the TID array is expected to be sorted or not * - * XXX maybe we can pass that to AssertCheckGinBuffer() call? - * XXX actually with the mergesort in GinBufferStoreTuple, we - * should not need 'false' here. See AssertCheckItemPointers. + * XXX maybe we can pass that to AssertCheckGinBuffer() call? XXX actually + * with the mergesort in GinBufferStoreTuple, we should not need 'false' + * here. See AssertCheckItemPointers. */ AssertCheckItemPointers(buffer, false); #endif @@ -1619,6 +1620,15 @@ _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort) /* sort the raw per-worker data */ tuplesort_performsort(state->bs_worker_sort); + /* print some basic info */ + elog(LOG, "_gin_parallel_scan_and_build raw %zu compressed %zu ratio %.2f%%", + state->buildStats.sizeRaw, state->buildStats.sizeCompressed, + (100.0 * state->buildStats.sizeCompressed) / state->buildStats.sizeRaw); + + /* reset before the second phase */ + state->buildStats.sizeCompressed = 0; + state->buildStats.sizeRaw = 0; + /* * Read the GIN tuples from the shared tuplesort, sorted by the key, and * merge them into larger chunks for the leader to combine. @@ -1645,7 +1655,7 @@ _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort) */ AssertCheckItemPointers(buffer, true); - ntup = _gin_build_tuple(buffer->attnum, buffer->category, + ntup = _gin_build_tuple(state, buffer->attnum, buffer->category, buffer->key, buffer->typlen, buffer->typbyval, buffer->items, buffer->nitems, &ntuplen); @@ -1672,7 +1682,7 @@ _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort) AssertCheckItemPointers(buffer, true); - ntup = _gin_build_tuple(buffer->attnum, buffer->category, + ntup = _gin_build_tuple(state, buffer->attnum, buffer->category, buffer->key, buffer->typlen, buffer->typbyval, buffer->items, buffer->nitems, &ntuplen); @@ -1687,6 +1697,11 @@ _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort) /* relase all the memory */ GinBufferFree(buffer); + /* print some basic info */ + elog(LOG, "_gin_process_worker_data raw %zu compressed %zu ratio %.2f%%", + state->buildStats.sizeRaw, state->buildStats.sizeCompressed, + (100.0 * state->buildStats.sizeCompressed) / state->buildStats.sizeRaw); + tuplesort_end(worker_sort); } @@ -1759,7 +1774,7 @@ _gin_parallel_scan_and_build(GinBuildState *state, /* there could be many entries, so be willing to abort here */ CHECK_FOR_INTERRUPTS(); - tup = _gin_build_tuple(attnum, category, + tup = _gin_build_tuple(state, attnum, category, key, attr->attlen, attr->attbyval, list, nlist, &len); @@ -1853,6 +1868,7 @@ _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc) /* initialize the GIN build state */ initGinState(&buildstate.ginstate, indexRel); buildstate.indtuples = 0; + /* XXX Shouldn't this initialize the other fields too, like ginbuild()? */ memset(&buildstate.buildStats, 0, sizeof(GinStatsData)); /* @@ -1930,7 +1946,8 @@ typedef struct * of that into the GIN tuple. */ static GinTuple * -_gin_build_tuple(OffsetNumber attrnum, unsigned char category, +_gin_build_tuple(GinBuildState *state, + OffsetNumber attrnum, unsigned char category, Datum key, int16 typlen, bool typbyval, ItemPointerData *items, uint32 nitems, Size *len) @@ -2064,6 +2081,13 @@ _gin_build_tuple(OffsetNumber attrnum, unsigned char category, pfree(seginfo); } + /* how large would the tuple be without compression? */ + state->buildStats.sizeRaw += MAXALIGN(offsetof(GinTuple, data) + keylen) + + nitems * sizeof(ItemPointerData); + + /* compressed size */ + state->buildStats.sizeCompressed += tuplen; + return tuple; } diff --git a/src/include/access/gin.h b/src/include/access/gin.h index be76d8446f..2b6633d068 100644 --- a/src/include/access/gin.h +++ b/src/include/access/gin.h @@ -49,6 +49,8 @@ typedef struct GinStatsData BlockNumber nDataPages; int64 nEntries; int32 ginVersion; + Size sizeRaw; + Size sizeCompressed; } GinStatsData; /* -- 2.39.2