From 0179fd17ec1ac414510e01b4afd5488eb2c6faa2 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Tue, 5 Mar 2024 15:05:47 +0700 Subject: [PATCH v71 1/6] Add TIDStore, to store sets of TIDs (ItemPointerData) efficiently. TIDStore is a data structure designed to efficiently store large sets of TIDs. For TID storage, it employs a radix tree, where the key is the BlockNumber, and the value is a bitmap representing offset numbers. The TIDStore can be created on a DSA area and used by multiple backend processes simultaneously. There are potential future users such as tidbitmap.c, though it's very likely the interface will need to evolve as we come to understand the needs of different kinds of users. For example, we can support updating the offset bitmap of existing values. Currently, the TIDStore is not used for anything yet, aside from the test code. But an upcoming patch will use it. This includes a unit test module, in src/test/modules/test_tidstore. Co-authored-by: John Naylor Discussion: https://postgr.es/m/CAD21AoAfOZvmfR0j8VmZorZjL7RhTiQdVttNuC4W-Shdc2a-AA%40mail.gmail.com --- doc/src/sgml/monitoring.sgml | 4 + src/backend/access/common/Makefile | 1 + src/backend/access/common/meson.build | 1 + src/backend/access/common/tidstore.c | 461 ++++++++++++++++++ src/backend/storage/lmgr/lwlock.c | 1 + src/include/access/tidstore.h | 48 ++ src/include/storage/lwlock.h | 1 + src/test/modules/Makefile | 1 + src/test/modules/meson.build | 1 + src/test/modules/test_tidstore/Makefile | 23 + .../test_tidstore/expected/test_tidstore.out | 128 +++++ src/test/modules/test_tidstore/meson.build | 33 ++ .../test_tidstore/sql/test_tidstore.sql | 58 +++ .../test_tidstore/test_tidstore--1.0.sql | 35 ++ .../modules/test_tidstore/test_tidstore.c | 245 ++++++++++ .../test_tidstore/test_tidstore.control | 4 + src/tools/pgindent/typedefs.list | 4 + 17 files changed, 1049 insertions(+) create mode 100644 src/backend/access/common/tidstore.c create mode 100644 src/include/access/tidstore.h create mode 100644 src/test/modules/test_tidstore/Makefile create mode 100644 src/test/modules/test_tidstore/expected/test_tidstore.out create mode 100644 src/test/modules/test_tidstore/meson.build create mode 100644 src/test/modules/test_tidstore/sql/test_tidstore.sql create mode 100644 src/test/modules/test_tidstore/test_tidstore--1.0.sql create mode 100644 src/test/modules/test_tidstore/test_tidstore.c create mode 100644 src/test/modules/test_tidstore/test_tidstore.control diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 8aca08140e..c8d76906aa 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -1099,6 +1099,10 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser See . + + SharedTidStore + Waiting to access a shared TID store. + Timeout The server process is waiting for a timeout diff --git a/src/backend/access/common/Makefile b/src/backend/access/common/Makefile index b9aff0ccfd..67b8cc6108 100644 --- a/src/backend/access/common/Makefile +++ b/src/backend/access/common/Makefile @@ -27,6 +27,7 @@ OBJS = \ syncscan.o \ toast_compression.o \ toast_internals.o \ + tidstore.o \ tupconvert.o \ tupdesc.o diff --git a/src/backend/access/common/meson.build b/src/backend/access/common/meson.build index 725041a4ce..a02397855e 100644 --- a/src/backend/access/common/meson.build +++ b/src/backend/access/common/meson.build @@ -15,6 +15,7 @@ backend_sources += files( 'syncscan.c', 'toast_compression.c', 'toast_internals.c', + 'tidstore.c', 'tupconvert.c', 'tupdesc.c', ) diff --git a/src/backend/access/common/tidstore.c b/src/backend/access/common/tidstore.c new file mode 100644 index 0000000000..b725b62d4c --- /dev/null +++ b/src/backend/access/common/tidstore.c @@ -0,0 +1,461 @@ +/*------------------------------------------------------------------------- + * + * tidstore.c + * TID (ItemPointerData) storage implementation. + * + * TidStore is a in-memory data structure to store TIDs (ItemPointerData). + * Internally it uses a radix tree as the storage for TIDs. The key is the + * BlockNumber and the value is a bitmap of offsets, BlocktableEntry. + * + * TidStore can be shared among parallel worker processes by passing DSA area + * to TidStoreCreate(). Other backends can attach to the shared TidStore by + * TidStoreAttach(). + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/access/common/tidstore.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/tidstore.h" +#include "miscadmin.h" +#include "nodes/bitmapset.h" +#include "port/pg_bitutils.h" +#include "storage/lwlock.h" +#include "utils/dsa.h" +#include "utils/memutils.h" + + +#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) +#define BITNUM(x) ((x) % BITS_PER_BITMAPWORD) + +/* number of active words for a page: */ +#define WORDS_PER_PAGE(n) ((n) / BITS_PER_BITMAPWORD + 1) + +/* + * This is named similarly to PagetableEntry in tidbitmap.c + * because the two have a similar function. + */ +typedef struct BlocktableEntry +{ + uint16 nwords; + bitmapword words[FLEXIBLE_ARRAY_MEMBER]; +} BlocktableEntry; +#define MaxBlocktableEntrySize \ + offsetof(BlocktableEntry, words) + \ + (sizeof(bitmapword) * WORDS_PER_PAGE(MaxOffsetNumber)) + +#define RT_PREFIX local_rt +#define RT_SCOPE static +#define RT_DECLARE +#define RT_DEFINE +#define RT_VALUE_TYPE BlocktableEntry +#define RT_VARLEN_VALUE_SIZE(page) \ + (offsetof(BlocktableEntry, words) + \ + sizeof(bitmapword) * (page)->nwords) +#include "lib/radixtree.h" + +#define RT_PREFIX shared_rt +#define RT_SHMEM +#define RT_SCOPE static +#define RT_DECLARE +#define RT_DEFINE +#define RT_VALUE_TYPE BlocktableEntry +#define RT_VARLEN_VALUE_SIZE(page) \ + (offsetof(BlocktableEntry, words) + \ + sizeof(bitmapword) * (page)->nwords) +#include "lib/radixtree.h" + +/* Per-backend state for a TidStore */ +struct TidStore +{ + /* MemoryContext where the TidStore is allocated */ + MemoryContext context; + + /* MemoryContext where the radix tree uses */ + MemoryContext rt_context; + + /* Storage for TIDs. Use either one depending on TidStoreIsShared() */ + union + { + local_rt_radix_tree *local; + shared_rt_radix_tree *shared; + } tree; + + /* DSA area for TidStore if using shared memory */ + dsa_area *area; +}; +#define TidStoreIsShared(ts) ((ts)->area != NULL) + +/* Iterator for TidStore */ +struct TidStoreIter +{ + TidStore *ts; + + /* iterator of radix tree. Use either one depending on TidStoreIsShared() */ + union + { + shared_rt_iter *shared; + local_rt_iter *local; + } tree_iter; + + /* output for the caller */ + TidStoreIterResult output; +}; + +static void tidstore_iter_extract_tids(TidStoreIter *iter, uint64 key, + BlocktableEntry *page); + +/* + * Create a TidStore. The TidStore will live in the memory context that is + * CurrentMemoryContext at the time of this call. The TID storage, backed + * by a radix tree, will live in its child memory context, rt_context. The + * TidStore will be limited to (approximately) max_bytes total memory + * consumption. If the 'area' is non-NULL, the radix tree is created in the + * DSA area. + * + * The returned object is allocated in backend-local memory. + */ +TidStore * +TidStoreCreate(size_t max_bytes, dsa_area *area) +{ + TidStore *ts; + size_t initBlockSize = ALLOCSET_DEFAULT_INITSIZE; + size_t minContextSize = ALLOCSET_DEFAULT_MINSIZE; + size_t maxBlockSize = ALLOCSET_DEFAULT_MAXSIZE; + + ts = palloc0(sizeof(TidStore)); + ts->context = CurrentMemoryContext; + + /* choose the maxBlockSize to be no larger than 1/16 of max_bytes */ + while (16 * maxBlockSize > max_bytes * 1024L) + maxBlockSize >>= 1; + + if (maxBlockSize < ALLOCSET_DEFAULT_INITSIZE) + maxBlockSize = ALLOCSET_DEFAULT_INITSIZE; + + /* Create a memory context for the TID storage */ + ts->rt_context = AllocSetContextCreate(CurrentMemoryContext, + "TID storage", + minContextSize, + initBlockSize, + maxBlockSize); + + if (area != NULL) + { + ts->tree.shared = shared_rt_create(ts->rt_context, area, + LWTRANCHE_SHARED_TIDSTORE); + ts->area = area; + } + else + ts->tree.local = local_rt_create(ts->rt_context); + + return ts; +} + +/* + * Attach to the shared TidStore using a handle. The returned object is + * allocated in backend-local memory using the CurrentMemoryContext. + */ +TidStore * +TidStoreAttach(dsa_area *area, dsa_pointer handle) +{ + TidStore *ts; + + Assert(area != NULL); + Assert(DsaPointerIsValid(handle)); + + /* create per-backend state */ + ts = palloc0(sizeof(TidStore)); + + /* Find the shared the shared radix tree */ + ts->tree.shared = shared_rt_attach(area, handle); + ts->area = area; + + return ts; +} + +/* + * Detach from a TidStore. This detaches from radix tree and frees the + * backend-local resources. The radix tree will continue to exist until + * it is either explicitly destroyed, or the area that backs it is returned + * to the operating system. + */ +void +TidStoreDetach(TidStore *ts) +{ + Assert(TidStoreIsShared(ts)); + + shared_rt_detach(ts->tree.shared); + pfree(ts); +} + +/* + * Lock support functions. + * + * We can use the radix tree's lock for shared TidStore as the data we + * need to protect is only the shared radix tree. + */ +void +TidStoreLockExclusive(TidStore *ts) +{ + if (TidStoreIsShared(ts)) + shared_rt_lock_exclusive(ts->tree.shared); +} + +void +TidStoreLockShare(TidStore *ts) +{ + if (TidStoreIsShared(ts)) + shared_rt_lock_share(ts->tree.shared); +} + +void +TidStoreUnlock(TidStore *ts) +{ + if (TidStoreIsShared(ts)) + shared_rt_unlock(ts->tree.shared); +} + +/* + * Destroy a TidStore, returning all memory. + * + * Note that the caller must be certain that no other backend will attempt to + * access the TidStore before calling this function. Other backend must + * explicitly call TidStoreDetach() to free up backend-local memory associated + * with the TidStore. The backend that calls TidStoreDestroy() must not call + * TidStoreDetach(). + */ +void +TidStoreDestroy(TidStore *ts) +{ + /* Destroy underlying radix tree */ + if (TidStoreIsShared(ts)) + shared_rt_free(ts->tree.shared); + else + local_rt_free(ts->tree.local); + + MemoryContextDelete(ts->rt_context); + + pfree(ts); +} + +/* + * Set the given TIDs on the blkno to TidStore. + * + * NB: the offset numbers in offsets must be sorted in ascending order. + */ +void +TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets, + int num_offsets) +{ + char data[MaxBlocktableEntrySize]; + BlocktableEntry *page = (BlocktableEntry *) data; + bitmapword word; + int wordnum; + int next_word_threshold; + int idx = 0; + bool found PG_USED_FOR_ASSERTS_ONLY; + + Assert(num_offsets > 0); + + for (wordnum = 0, next_word_threshold = BITS_PER_BITMAPWORD; + wordnum <= WORDNUM(offsets[num_offsets - 1]); + wordnum++, next_word_threshold += BITS_PER_BITMAPWORD) + { + word = 0; + + while (idx < num_offsets) + { + OffsetNumber off = offsets[idx]; + + /* safety check to ensure we don't overrun bit array bounds */ + if (!OffsetNumberIsValid(off)) + elog(ERROR, "tuple offset out of range: %u", off); + + if (off >= next_word_threshold) + break; + + word |= ((bitmapword) 1 << BITNUM(off)); + idx++; + } + + /* write out offset bitmap for this wordnum */ + page->words[wordnum] = word; + } + + page->nwords = wordnum; + Assert(page->nwords == WORDS_PER_PAGE(offsets[num_offsets - 1])); + + if (TidStoreIsShared(ts)) + found = shared_rt_set(ts->tree.shared, blkno, page); + else + found = local_rt_set(ts->tree.local, blkno, page); + + Assert(!found); +} + +/* Return true if the given TID is present in the TidStore */ +bool +TidStoreIsMember(TidStore *ts, ItemPointer tid) +{ + int wordnum; + int bitnum; + BlocktableEntry *page; + BlockNumber blk = ItemPointerGetBlockNumber(tid); + OffsetNumber off = ItemPointerGetOffsetNumber(tid); + bool ret; + + if (TidStoreIsShared(ts)) + page = shared_rt_find(ts->tree.shared, blk); + else + page = local_rt_find(ts->tree.local, blk); + + /* no entry for the blk */ + if (page == NULL) + return false; + + wordnum = WORDNUM(off); + bitnum = BITNUM(off); + + /* no bitmap for the off */ + if (wordnum >= page->nwords) + return false; + + ret = (page->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0; + + return ret; +} + +/* + * Prepare to iterate through a TidStore. Since the radix tree is locked during + * the iteration, TidStoreEndIterate() needs to be called when finished. + * + * The TidStoreIter struct is created in the caller's memory context. + * + * Concurrent updates during the iteration will be blocked when inserting a + * key-value to the radix tree. + */ +TidStoreIter * +TidStoreBeginIterate(TidStore *ts) +{ + TidStoreIter *iter; + + iter = palloc0(sizeof(TidStoreIter)); + iter->ts = ts; + + /* + * We start with an array large enough to contain at least the offsets + * from one completely full bitmap element. + */ + iter->output.max_offset = 2 * BITS_PER_BITMAPWORD; + iter->output.offsets = palloc(sizeof(OffsetNumber) * iter->output.max_offset); + + if (TidStoreIsShared(ts)) + iter->tree_iter.shared = shared_rt_begin_iterate(ts->tree.shared); + else + iter->tree_iter.local = local_rt_begin_iterate(ts->tree.local); + + return iter; +} + + +/* + * Scan the TidStore and return a pointer to TidStoreIterResult that has TIDs + * in one block. We return the block numbers in ascending order and the offset + * numbers in each result is also sorted in ascending order. + */ +TidStoreIterResult * +TidStoreIterateNext(TidStoreIter *iter) +{ + uint64 key; + BlocktableEntry *page; + TidStoreIterResult *result = &(iter->output); + + if (TidStoreIsShared(iter->ts)) + page = shared_rt_iterate_next(iter->tree_iter.shared, &key); + else + page = local_rt_iterate_next(iter->tree_iter.local, &key); + + if (page == NULL) + return NULL; + + /* Collect TIDs extracted from the key-value pair */ + tidstore_iter_extract_tids(iter, key, page); + + return result; +} + +/* + * Finish an iteration over TidStore. This needs to be called after finishing + * or when existing an iteration. + */ +void +TidStoreEndIterate(TidStoreIter *iter) +{ + if (TidStoreIsShared(iter->ts)) + shared_rt_end_iterate(iter->tree_iter.shared); + else + local_rt_end_iterate(iter->tree_iter.local); + + pfree(iter->output.offsets); + pfree(iter); +} + +/* Return the memory usage of TidStore */ +size_t +TidStoreMemoryUsage(TidStore *ts) +{ + if (TidStoreIsShared(ts)) + return shared_rt_memory_usage(ts->tree.shared); + else + return local_rt_memory_usage(ts->tree.local); +} + +dsa_pointer +TidStoreGetHandle(TidStore *ts) +{ + Assert(TidStoreIsShared(ts)); + + return (dsa_pointer) shared_rt_get_handle(ts->tree.shared); +} + +/* Extract TIDs from the given key-value pair */ +static void +tidstore_iter_extract_tids(TidStoreIter *iter, uint64 key, BlocktableEntry *page) +{ + TidStoreIterResult *result = (&iter->output); + int wordnum; + + result->num_offsets = 0; + result->blkno = (BlockNumber) key; + + for (wordnum = 0; wordnum < page->nwords; wordnum++) + { + bitmapword w = page->words[wordnum]; + + /* Make sure there is enough space to add offsets */ + if ((result->num_offsets + BITS_PER_BITMAPWORD) > result->max_offset) + { + result->max_offset *= 2; + result->offsets = repalloc(result->offsets, + sizeof(OffsetNumber) * result->max_offset); + } + + while (w != 0) + { + /* get pos of rightmost bit */ + int bitnum = bmw_rightmost_one_pos(w); + int off = wordnum * BITS_PER_BITMAPWORD + bitnum; + + result->offsets[result->num_offsets++] = off; + + /* unset the rightmost bit */ + w &= w - 1; + } + } +} diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 30f3a09a4c..e9dd5e6f99 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -151,6 +151,7 @@ static const char *const BuiltinTrancheNames[] = { [LWTRANCHE_PER_SESSION_RECORD_TYPMOD] = "PerSessionRecordTypmod", [LWTRANCHE_SHARED_TUPLESTORE] = "SharedTupleStore", [LWTRANCHE_SHARED_TIDBITMAP] = "SharedTidBitmap", + [LWTRANCHE_SHARED_TIDSTORE] = "SharedTidStore", [LWTRANCHE_PARALLEL_APPEND] = "ParallelAppend", [LWTRANCHE_PER_XACT_PREDICATE_LIST] = "PerXactPredicateList", [LWTRANCHE_PGSTATS_DSA] = "PgStatsDSA", diff --git a/src/include/access/tidstore.h b/src/include/access/tidstore.h new file mode 100644 index 0000000000..b3c331ea1d --- /dev/null +++ b/src/include/access/tidstore.h @@ -0,0 +1,48 @@ +/*------------------------------------------------------------------------- + * + * tidstore.h + * Tid storage. + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/access/tidstore.h + * + *------------------------------------------------------------------------- + */ +#ifndef TIDSTORE_H +#define TIDSTORE_H + +#include "storage/itemptr.h" +#include "utils/dsa.h" + +typedef struct TidStore TidStore; +typedef struct TidStoreIter TidStoreIter; + +/* Result struct for TidStoreIterateNext */ +typedef struct TidStoreIterResult +{ + BlockNumber blkno; + int max_offset; + int num_offsets; + OffsetNumber *offsets; +} TidStoreIterResult; + +extern TidStore *TidStoreCreate(size_t max_bytes, dsa_area *dsa); +extern TidStore *TidStoreAttach(dsa_area *dsa, dsa_pointer rt_dp); +extern void TidStoreDetach(TidStore *ts); +extern void TidStoreLockExclusive(TidStore *ts); +extern void TidStoreLockShare(TidStore *ts); +extern void TidStoreUnlock(TidStore *ts); +extern void TidStoreDestroy(TidStore *ts); +extern void TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets, + int num_offsets); +extern bool TidStoreIsMember(TidStore *ts, ItemPointer tid); +extern TidStoreIter *TidStoreBeginIterate(TidStore *ts); +extern TidStoreIterResult *TidStoreIterateNext(TidStoreIter *iter); +extern void TidStoreEndIterate(TidStoreIter *iter); +extern size_t TidStoreMemoryUsage(TidStore *ts); +extern dsa_pointer TidStoreGetHandle(TidStore *ts); + +#endif /* TIDSTORE_H */ diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 10bea8c595..152e3b047e 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -200,6 +200,7 @@ typedef enum BuiltinTrancheIds LWTRANCHE_PER_SESSION_RECORD_TYPMOD, LWTRANCHE_SHARED_TUPLESTORE, LWTRANCHE_SHARED_TIDBITMAP, + LWTRANCHE_SHARED_TIDSTORE, LWTRANCHE_PARALLEL_APPEND, LWTRANCHE_PER_XACT_PREDICATE_LIST, LWTRANCHE_PGSTATS_DSA, diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile index 875a76d6f1..1cbd532156 100644 --- a/src/test/modules/Makefile +++ b/src/test/modules/Makefile @@ -35,6 +35,7 @@ SUBDIRS = \ test_rls_hooks \ test_shm_mq \ test_slru \ + test_tidstore \ unsafe_tests \ worker_spi \ xid_wraparound diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build index f1d18a1b29..7c11fb97f2 100644 --- a/src/test/modules/meson.build +++ b/src/test/modules/meson.build @@ -34,6 +34,7 @@ subdir('test_resowner') subdir('test_rls_hooks') subdir('test_shm_mq') subdir('test_slru') +subdir('test_tidstore') subdir('unsafe_tests') subdir('worker_spi') subdir('xid_wraparound') diff --git a/src/test/modules/test_tidstore/Makefile b/src/test/modules/test_tidstore/Makefile new file mode 100644 index 0000000000..dab107d70c --- /dev/null +++ b/src/test/modules/test_tidstore/Makefile @@ -0,0 +1,23 @@ +# src/test/modules/test_tidstore/Makefile + +MODULE_big = test_tidstore +OBJS = \ + $(WIN32RES) \ + test_tidstore.o +PGFILEDESC = "test_tidstore - test code for src/backend/access/common/tidstore.c" + +EXTENSION = test_tidstore +DATA = test_tidstore--1.0.sql + +REGRESS = test_tidstore + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_tidstore +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/test_tidstore/expected/test_tidstore.out b/src/test/modules/test_tidstore/expected/test_tidstore.out new file mode 100644 index 0000000000..80698924a1 --- /dev/null +++ b/src/test/modules/test_tidstore/expected/test_tidstore.out @@ -0,0 +1,128 @@ +CREATE EXTENSION test_tidstore; +-- Constant values used in the tests. +\set maxblkno 4294967295 +-- The maximum number of heap tuples (MaxHeapTuplesPerPage) in 8kB block is 291. +-- We use a higher number to test tidstore. +\set maxoffset 512 +-- Support functions. +CREATE FUNCTION make_tid(a bigint, b int2) RETURNS tid +BEGIN ATOMIC +RETURN ('(' || a || ', ' || b || ')')::tid; +END; +-- Lookup test function. Search 1 to (:maxoffset + 5) offset numbers in +-- 4 blocks, and return TIDS if found in the tidstore. +CREATE FUNCTION lookup_test() RETURNS SETOF tid +BEGIN ATOMIC; +WITH blocks (blk) AS ( +VALUES (0), (2), (:maxblkno - 2), (:maxblkno) +) +SELECT t_ctid + FROM + (SELECT array_agg(make_tid(blk, off::int2)) AS tids + FROM blocks, generate_series(1, :maxoffset + 5) off) AS foo, + LATERAL test_lookup_tids(foo.tids) + WHERE found ORDER BY t_ctid; +END; +-- Test a local tdistore. A shared tidstore is created by passing true. +SELECT test_create(false); + test_create +------------- + +(1 row) + +-- Test on empty tidstore. +SELECT * + FROM test_lookup_tids(ARRAY[make_tid(0, 1::int2), + make_tid(:maxblkno, :maxoffset::int2)]::tid[]); + t_ctid | found +------------------+------- + (0,1) | f + (4294967295,512) | f +(2 rows) + +SELECT test_is_full(); + test_is_full +-------------- + f +(1 row) + +-- Add tids in out of order. +WITH blocks (blk) AS( +VALUES (0), (1), (:maxblkno - 1), (:maxblkno / 2), (:maxblkno) +), +offsets (off) AS ( +VALUES (1), (2), (:maxoffset / 2), (:maxoffset - 1), (:maxoffset) +) +SELECT test_set_block_offsets(blk, array_agg(offsets.off)::int2[]) + FROM blocks, offsets + GROUP BY blk; + test_set_block_offsets +------------------------ + 2147483647 + 0 + 4294967294 + 1 + 4294967295 +(5 rows) + +-- Lookup test and dump (sorted) tids. +SELECT lookup_test(); + lookup_test +------------------ + (0,1) + (0,2) + (0,256) + (0,511) + (0,512) + (4294967295,1) + (4294967295,2) + (4294967295,256) + (4294967295,511) + (4294967295,512) +(10 rows) + +SELECT test_is_full(); + test_is_full +-------------- + f +(1 row) + +SELECT test_dump_tids(); + test_dump_tids +------------------ + (0,1) + (0,2) + (0,256) + (0,511) + (0,512) + (1,1) + (1,2) + (1,256) + (1,511) + (1,512) + (2147483647,1) + (2147483647,2) + (2147483647,256) + (2147483647,511) + (2147483647,512) + (4294967294,1) + (4294967294,2) + (4294967294,256) + (4294967294,511) + (4294967294,512) + (4294967295,1) + (4294967295,2) + (4294967295,256) + (4294967295,511) + (4294967295,512) +(25 rows) + +-- cleanup +SELECT test_destroy(); + test_destroy +-------------- + +(1 row) + +DROP FUNCTION lookup_test(); +DROP FUNCTION make_tid(a bigint, b int2); diff --git a/src/test/modules/test_tidstore/meson.build b/src/test/modules/test_tidstore/meson.build new file mode 100644 index 0000000000..0ed3ea2ef3 --- /dev/null +++ b/src/test/modules/test_tidstore/meson.build @@ -0,0 +1,33 @@ +# Copyright (c) 2024, PostgreSQL Global Development Group + +test_tidstore_sources = files( + 'test_tidstore.c', +) + +if host_system == 'windows' + test_tidstore_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'test_tidstore', + '--FILEDESC', 'test_tidstore - test code for src/backend/access/common/tidstore.c',]) +endif + +test_tidstore = shared_module('test_tidstore', + test_tidstore_sources, + kwargs: pg_test_mod_args, +) +test_install_libs += test_tidstore + +test_install_data += files( + 'test_tidstore.control', + 'test_tidstore--1.0.sql', +) + +tests += { + 'name': 'test_tidstore', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'regress': { + 'sql': [ + 'test_tidstore', + ], + }, +} diff --git a/src/test/modules/test_tidstore/sql/test_tidstore.sql b/src/test/modules/test_tidstore/sql/test_tidstore.sql new file mode 100644 index 0000000000..cc1207821c --- /dev/null +++ b/src/test/modules/test_tidstore/sql/test_tidstore.sql @@ -0,0 +1,58 @@ +CREATE EXTENSION test_tidstore; + +-- Constant values used in the tests. +\set maxblkno 4294967295 +-- The maximum number of heap tuples (MaxHeapTuplesPerPage) in 8kB block is 291. +-- We use a higher number to test tidstore. +\set maxoffset 512 + +-- Support functions. +CREATE FUNCTION make_tid(a bigint, b int2) RETURNS tid +BEGIN ATOMIC +RETURN ('(' || a || ', ' || b || ')')::tid; +END; + +-- Lookup test function. Search 1 to (:maxoffset + 5) offset numbers in +-- 4 blocks, and return TIDS if found in the tidstore. +CREATE FUNCTION lookup_test() RETURNS SETOF tid +BEGIN ATOMIC; +WITH blocks (blk) AS ( +VALUES (0), (2), (:maxblkno - 2), (:maxblkno) +) +SELECT t_ctid + FROM + (SELECT array_agg(make_tid(blk, off::int2)) AS tids + FROM blocks, generate_series(1, :maxoffset + 5) off) AS foo, + LATERAL test_lookup_tids(foo.tids) + WHERE found ORDER BY t_ctid; +END; + +-- Test a local tdistore. A shared tidstore is created by passing true. +SELECT test_create(false); + +-- Test on empty tidstore. +SELECT * + FROM test_lookup_tids(ARRAY[make_tid(0, 1::int2), + make_tid(:maxblkno, :maxoffset::int2)]::tid[]); +SELECT test_is_full(); + +-- Add tids in out of order. +WITH blocks (blk) AS( +VALUES (0), (1), (:maxblkno - 1), (:maxblkno / 2), (:maxblkno) +), +offsets (off) AS ( +VALUES (1), (2), (:maxoffset / 2), (:maxoffset - 1), (:maxoffset) +) +SELECT test_set_block_offsets(blk, array_agg(offsets.off)::int2[]) + FROM blocks, offsets + GROUP BY blk; + +-- Lookup test and dump (sorted) tids. +SELECT lookup_test(); +SELECT test_is_full(); +SELECT test_dump_tids(); + +-- cleanup +SELECT test_destroy(); +DROP FUNCTION lookup_test(); +DROP FUNCTION make_tid(a bigint, b int2); diff --git a/src/test/modules/test_tidstore/test_tidstore--1.0.sql b/src/test/modules/test_tidstore/test_tidstore--1.0.sql new file mode 100644 index 0000000000..305459334d --- /dev/null +++ b/src/test/modules/test_tidstore/test_tidstore--1.0.sql @@ -0,0 +1,35 @@ +/* src/test/modules/test_tidstore/test_tidstore--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION test_tidstore" to load this file. \quit + +CREATE FUNCTION test_create( +shared bool) +RETURNS void STRICT PARALLEL UNSAFE +AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION test_set_block_offsets( +blkno bigint, +offsets int2[]) +RETURNS bigint STRICT PARALLEL UNSAFE +AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION test_dump_tids( +t_ctid OUT tid) +RETURNS SETOF tid STRICT PARALLEL UNSAFE +AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION test_lookup_tids( +t_ctids tid[], +t_ctid OUT tid, +found OUT bool) +RETURNS SETOF record STRICT PARALLEL UNSAFE +AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION test_is_full() +RETURNS bool STRICT PARALLEL UNSAFE +AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION test_destroy() +RETURNS void STRICT PARALLEL UNSAFE +AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/src/test/modules/test_tidstore/test_tidstore.c b/src/test/modules/test_tidstore/test_tidstore.c new file mode 100644 index 0000000000..428d6a3fcf --- /dev/null +++ b/src/test/modules/test_tidstore/test_tidstore.c @@ -0,0 +1,245 @@ +/*-------------------------------------------------------------------------- + * + * test_tidstore.c + * Test TidStore data structure. + * + * Copyright (c) 2024, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/test/modules/test_tidstore/test_tidstore.c + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/tidstore.h" +#include "fmgr.h" +#include "funcapi.h" +#include "storage/block.h" +#include "storage/itemptr.h" +#include "storage/lwlock.h" +#include "utils/array.h" +#include "utils/memutils.h" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(test_create); +PG_FUNCTION_INFO_V1(test_set_block_offsets); +PG_FUNCTION_INFO_V1(test_dump_tids); +PG_FUNCTION_INFO_V1(test_lookup_tids); +PG_FUNCTION_INFO_V1(test_is_full); +PG_FUNCTION_INFO_V1(test_destroy); + +static TidStore *tidstore = NULL; +static dsa_area *dsa = NULL; +static int64 num_tids = 0; +static size_t max_bytes = (2 * 1024 * 1024L); /* 2MB */ + +/* + * Create a TidStore. If shared is false, the tidstore is created + * on TopMemoryContext, otherwise on DSA. Although the tidstore + * is created on DSA, only the same process can subsequently use + * the tidstore. The tidstore handle is not shared anywhere. +*/ +Datum +test_create(PG_FUNCTION_ARGS) +{ + bool shared = PG_GETARG_BOOL(0); + MemoryContext old_ctx; + + Assert(tidstore == NULL); + Assert(dsa == NULL); + + old_ctx = MemoryContextSwitchTo(TopMemoryContext); + + if (shared) + { + int tranche_id; + + tranche_id = LWLockNewTrancheId(); + LWLockRegisterTranche(tranche_id, "test_tidstore"); + + dsa = dsa_create(tranche_id); + + /* + * Remain attached until end of backend or explicitly detached so that + * the same process use the tidstore for subsequent tests. + */ + dsa_pin_mapping(dsa); + + tidstore = TidStoreCreate(max_bytes, dsa); + } + else + tidstore = TidStoreCreate(max_bytes, NULL); + + num_tids = 0; + + MemoryContextSwitchTo(old_ctx); + + PG_RETURN_VOID(); +} + +static void +sanity_check_array(ArrayType *ta) +{ + if (ARR_HASNULL(ta) && array_contains_nulls(ta)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("array must not contain nulls"))); + + if (ARR_NDIM(ta) > 1) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("argument must be empty or one-dimensional array"))); +} + +/* Set the given block and offsets pairs */ +Datum +test_set_block_offsets(PG_FUNCTION_ARGS) +{ + BlockNumber blkno = PG_GETARG_INT64(0); + ArrayType *ta = PG_GETARG_ARRAYTYPE_P_COPY(1); + OffsetNumber *offs; + int noffs; + + sanity_check_array(ta); + + noffs = ArrayGetNItems(ARR_NDIM(ta), ARR_DIMS(ta)); + offs = ((OffsetNumber *) ARR_DATA_PTR(ta)); + + /* Set TIDs */ + TidStoreLockExclusive(tidstore); + TidStoreSetBlockOffsets(tidstore, blkno, offs, noffs); + TidStoreUnlock(tidstore); + + /* Update statistics */ + num_tids += noffs; + + PG_RETURN_INT64(blkno); +} + +/* + * Dump and return TIDs in the tidstore. The output TIDs are ordered. + */ +Datum +test_dump_tids(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + ItemPointerData *tids; + + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + TidStoreIter *iter; + TidStoreIterResult *iter_result; + int64 ntids = 0; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + tids = (ItemPointerData *) + palloc0(sizeof(ItemPointerData) * num_tids); + + /* Collect TIDs stored in the tidstore */ + TidStoreLockShare(tidstore); + iter = TidStoreBeginIterate(tidstore); + while ((iter_result = TidStoreIterateNext(iter)) != NULL) + { + for (int i = 0; i < iter_result->num_offsets; i++) + ItemPointerSet(&(tids[ntids++]), iter_result->blkno, + iter_result->offsets[i]); + } + TidStoreUnlock(tidstore); + + Assert(ntids == num_tids); + + funcctx->user_fctx = tids; + funcctx->max_calls = num_tids; + + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + tids = (ItemPointerData *) funcctx->user_fctx; + + if (funcctx->call_cntr < funcctx->max_calls) + { + int idx; + + /* + * Note that since funcctx->call_cntr is incremented in + * SRF_RETURN_NEXT before return, we need to remember the current + * counter to access the tid array. + */ + idx = funcctx->call_cntr; + SRF_RETURN_NEXT(funcctx, PointerGetDatum(&(tids[idx]))); + } + + SRF_RETURN_DONE(funcctx); +} + +/* + * Test if the given TIDs exist on the tidstore. + */ +Datum +test_lookup_tids(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P_COPY(0); + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + ItemPointer tids; + int ntids; + Datum values[2]; + bool nulls[2] = {false}; + + sanity_check_array(ta); + + InitMaterializedSRF(fcinfo, 0); + + ntids = ArrayGetNItems(ARR_NDIM(ta), ARR_DIMS(ta)); + tids = ((ItemPointer) ARR_DATA_PTR(ta)); + + for (int i = 0; i < ntids; i++) + { + bool found; + ItemPointerData tid = tids[i]; + + TidStoreLockShare(tidstore); + found = TidStoreIsMember(tidstore, &tid); + TidStoreUnlock(tidstore); + + values[0] = ItemPointerGetDatum(&tid); + values[1] = BoolGetDatum(found); + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + + return (Datum) 0; +} + +/* + * Return true if the size of tidstore reached the maximum memory + * limit. + */ +Datum +test_is_full(PG_FUNCTION_ARGS) +{ + bool is_full; + + is_full = (TidStoreMemoryUsage(tidstore) > max_bytes); + + PG_RETURN_BOOL(is_full); +} + +/* Free the tidstore */ +Datum +test_destroy(PG_FUNCTION_ARGS) +{ + TidStoreDestroy(tidstore); + tidstore = NULL; + num_tids = 0; + + if (dsa) + dsa_detach(dsa); + + PG_RETURN_VOID(); +} diff --git a/src/test/modules/test_tidstore/test_tidstore.control b/src/test/modules/test_tidstore/test_tidstore.control new file mode 100644 index 0000000000..9b6bd4638f --- /dev/null +++ b/src/test/modules/test_tidstore/test_tidstore.control @@ -0,0 +1,4 @@ +comment = 'Test code for tidstore' +default_version = '1.0' +module_pathname = '$libdir/test_tidstore' +relocatable = true diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index d3a7f75b08..a0fa0d2d1f 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -4056,3 +4056,7 @@ rfile ws_options ws_file_info PathKeyInfo +TidStore +TidStoreIter +TidStoreIterResult +BlocktableEntry -- 2.44.0