From ba74bb6adbd52a960ed2d805591da378643c6639 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@vondra.me>
Date: Mon, 24 Feb 2025 23:14:17 +0100
Subject: [PATCH v20250225 2/7] cleanup

---
 src/backend/access/gin/gininsert.c         | 40 +++++-----------------
 src/backend/utils/sort/tuplesortvariants.c |  2 --
 src/include/access/gin_tuple.h             |  8 +----
 3 files changed, 10 insertions(+), 40 deletions(-)

diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index a23b457bba3..7c2f46b9541 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -145,7 +145,7 @@ typedef struct
 	MemoryContext funcCtx;
 	BuildAccumulator accum;
 	ItemPointerData tid;
-	int				work_mem;
+	int			work_mem;
 
 	/* FIXME likely duplicate with indtuples */
 	double		bs_numtuples;
@@ -566,16 +566,8 @@ ginBuildCallbackParallel(Relation index, ItemPointer tid, Datum *values,
 
 	/*
 	 * If we've maxed out our available memory, dump everything to the
-	 * tuplesort.
-	 *
-	 * XXX It might seem this should set the memory limit to 32MB, same as
-	 * what plan_create_index_workers() uses to calculate the number of
-	 * parallel workers, but that's the limit for tuplesort. So it seems
-	 * better to keep using work_mem here.
-	 *
-	 * XXX But maybe we should calculate this as a per-worker fraction of
-	 * maintenance_work_mem. It's weird to use work_mem here, in a clearly
-	 * maintenance command.
+	 * tuplesort. We use half the per-worker fraction of maintenance_work_mem,
+	 * the other half is used for the tuplesort.
 	 */
 	if (buildstate->accum.allocatedMemory >= buildstate->work_mem * (Size) 1024)
 		ginFlushBuildState(buildstate, index);
@@ -607,11 +599,7 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 	buildstate.indtuples = 0;
 	memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
 
-	/*
-	 * Initialize all the fields, not to trip valgrind.
-	 *
-	 * XXX Maybe there should be an "init" function for build state?
-	 */
+	/* Initialize fields for parallel build too. */
 	buildstate.bs_numtuples = 0;
 	buildstate.bs_reltuples = 0;
 	buildstate.bs_leader = NULL;
@@ -1195,9 +1183,8 @@ AssertCheckItemPointers(GinBuffer *buffer)
 /*
  * GinBuffer checks
  *
- * XXX Maybe it would be better to have AssertCheckGinBuffer with flags, instead
- * of having to call AssertCheckItemPointers in some places, if we require the
- * items to not be empty?
+ * Make sure the nitems/items fields are consistent (either the array is empty
+ * or not empty, the fields need to agree). If there are items, check ordering.
  */
 static void
 AssertCheckGinBuffer(GinBuffer *buffer)
@@ -1415,11 +1402,6 @@ GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
 /*
  * GinBufferReset
  *		Reset the buffer into a state as if it contains no data.
- *
- * XXX Should we do something if the array of TIDs gets too large? It may
- * grow too much, and we'll not free it until the worker finishes building.
- * But it's better to not let the array grow arbitrarily large, and enforce
- * work_mem as memory limit by flushing the buffer into the tuplestore.
  */
 static void
 GinBufferReset(GinBuffer *buffer)
@@ -1468,11 +1450,6 @@ GinBufferFree(GinBuffer *buffer)
  *		Check if a given GIN tuple can be added to the current buffer.
  *
  * Returns true if the buffer is either empty or for the same index key.
- *
- * XXX This could / should also enforce a memory limit by checking the size of
- * the TID array, and returning false if it's too large (more thant work_mem,
- * for example). But in the leader we need to be careful not to force flushing
- * data too early, which might break the monotonicity of TID list.
  */
 static bool
 GinBufferCanAddKey(GinBuffer *buffer, GinTuple *tup)
@@ -1987,8 +1964,9 @@ _gin_build_tuple(OffsetNumber attrnum, unsigned char category,
 	/*
 	 * Allocate space for the whole GIN tuple.
 	 *
-	 * XXX palloc0 so that valgrind does not complain about uninitialized
-	 * bytes in writetup_index_gin, likely because of padding
+	 * The palloc0 is needed - writetup_index_gin will write the whole tuple
+	 * to disk, so we need to make sure the padding bytes are defined
+	 * (otherwise valgrind would report this).
 	 */
 	tuple = palloc0(tuplen);
 
diff --git a/src/backend/utils/sort/tuplesortvariants.c b/src/backend/utils/sort/tuplesortvariants.c
index 4d3114076b3..eb8601e2257 100644
--- a/src/backend/utils/sort/tuplesortvariants.c
+++ b/src/backend/utils/sort/tuplesortvariants.c
@@ -632,8 +632,6 @@ tuplesort_begin_index_gin(Relation heapRel,
 		/*
 		 * Look for a ordering for the index key data type, and then the sort
 		 * support function.
-		 *
-		 * XXX does this use the right opckeytype/opcintype for GIN?
 		 */
 		typentry = lookup_type_cache(att->atttypid, TYPECACHE_LT_OPR);
 		PrepareSortSupportFromOrderingOp(typentry->lt_opr, sortKey);
diff --git a/src/include/access/gin_tuple.h b/src/include/access/gin_tuple.h
index c8fe1130aa4..ce555031335 100644
--- a/src/include/access/gin_tuple.h
+++ b/src/include/access/gin_tuple.h
@@ -15,13 +15,7 @@
 #include "utils/sortsupport.h"
 
 /*
- * Each worker sees tuples in CTID order, so if we track the first TID and
- * compare that when combining results in the worker, we would not need to
- * do an expensive sort in workers (the mergesort is already smart about
- * detecting this and just concatenating the lists). We'd still need the
- * full mergesort in the leader, but that's much cheaper.
- *
- * XXX do we still need all the fields now that we use SortSupport?
+ * Data for one key in a GIN index.
  */
 typedef struct GinTuple
 {
-- 
2.48.1