From 1085ef0b9b8b31795616abc43063a91b27e7d5a4 Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Wed, 25 Jan 2023 17:43:29 +0900 Subject: [PATCH v24 5/9] Add read-write lock to radix tree in RT_SHMEM case. --- src/include/lib/radixtree.h | 102 ++++++++++++++++-- .../modules/test_radixtree/test_radixtree.c | 8 +- 2 files changed, 100 insertions(+), 10 deletions(-) diff --git a/src/include/lib/radixtree.h b/src/include/lib/radixtree.h index f591d903fc..48134b10e4 100644 --- a/src/include/lib/radixtree.h +++ b/src/include/lib/radixtree.h @@ -40,6 +40,18 @@ * There are some optimizations not yet implemented, particularly path * compression and lazy path expansion. * + * To handle concurrency, we use a single reader-writer lock for the radix + * tree. The radix tree is exclusively locked during write operations such + * as RT_SET() and RT_DELETE(), and shared locked during read operations + * such as RT_SEARCH(). An iteration also holds the shared lock on the radix + * tree until it is completed. + * + * TODO: The current locking mechanism is not optimized for high concurrency + * with mixed read-write workloads. In the future it might be worthwhile + * to replace it with the Optimistic Lock Coupling or ROWEX mentioned in + * the paper "The ART of Practical Synchronization" by the same authors as + * the ART paper, 2016. + * * WIP: the radix tree nodes don't shrink. * * To generate a radix tree and associated functions for a use case several @@ -224,7 +236,7 @@ typedef dsa_pointer RT_HANDLE; #endif #ifdef RT_SHMEM -RT_SCOPE RT_RADIX_TREE * RT_CREATE(MemoryContext ctx, dsa_area *dsa); +RT_SCOPE RT_RADIX_TREE * RT_CREATE(MemoryContext ctx, dsa_area *dsa, int tranche_id); RT_SCOPE RT_RADIX_TREE * RT_ATTACH(dsa_area *dsa, dsa_pointer dp); RT_SCOPE void RT_DETACH(RT_RADIX_TREE *tree); RT_SCOPE RT_HANDLE RT_GET_HANDLE(RT_RADIX_TREE *tree); @@ -371,6 +383,16 @@ typedef struct RT_NODE #define RT_INVALID_PTR_ALLOC NULL #endif +#ifdef RT_SHMEM +#define RT_LOCK_EXCLUSIVE(tree) LWLockAcquire(&tree->ctl->lock, LW_EXCLUSIVE) +#define RT_LOCK_SHARED(tree) LWLockAcquire(&tree->ctl->lock, LW_SHARED) +#define RT_UNLOCK(tree) LWLockRelease(&tree->ctl->lock); +#else +#define RT_LOCK_EXCLUSIVE(tree) ((void) 0) +#define RT_LOCK_SHARED(tree) ((void) 0) +#define RT_UNLOCK(tree) ((void) 0) +#endif + /* * Inner nodes and leaf nodes have analogous structure. To distinguish * them at runtime, we take advantage of the fact that the key chunk @@ -596,6 +618,7 @@ typedef struct RT_RADIX_TREE_CONTROL #ifdef RT_SHMEM RT_HANDLE handle; uint32 magic; + LWLock lock; #endif RT_PTR_ALLOC root; @@ -1376,7 +1399,7 @@ RT_NODE_INSERT_LEAF(RT_RADIX_TREE *tree, RT_PTR_LOCAL parent, RT_PTR_ALLOC store */ RT_SCOPE RT_RADIX_TREE * #ifdef RT_SHMEM -RT_CREATE(MemoryContext ctx, dsa_area *dsa) +RT_CREATE(MemoryContext ctx, dsa_area *dsa, int tranche_id) #else RT_CREATE(MemoryContext ctx) #endif @@ -1398,6 +1421,7 @@ RT_CREATE(MemoryContext ctx) tree->ctl = (RT_RADIX_TREE_CONTROL *) dsa_get_address(dsa, dp); tree->ctl->handle = dp; tree->ctl->magic = RT_RADIX_TREE_MAGIC; + LWLockInitialize(&tree->ctl->lock, tranche_id); #else tree->ctl = (RT_RADIX_TREE_CONTROL *) palloc0(sizeof(RT_RADIX_TREE_CONTROL)); @@ -1581,6 +1605,8 @@ RT_SET(RT_RADIX_TREE *tree, uint64 key, RT_VALUE_TYPE value) Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC); #endif + RT_LOCK_EXCLUSIVE(tree); + /* Empty tree, create the root */ if (!RT_PTR_ALLOC_IS_VALID(tree->ctl->root)) RT_NEW_ROOT(tree, key); @@ -1606,6 +1632,7 @@ RT_SET(RT_RADIX_TREE *tree, uint64 key, RT_VALUE_TYPE value) if (!RT_NODE_SEARCH_INNER(child, key, &new_child)) { RT_SET_EXTEND(tree, key, value, parent, stored_child, child); + RT_UNLOCK(tree); return false; } @@ -1620,12 +1647,13 @@ RT_SET(RT_RADIX_TREE *tree, uint64 key, RT_VALUE_TYPE value) if (!updated) tree->ctl->num_keys++; + RT_UNLOCK(tree); return updated; } /* * Search the given key in the radix tree. Return true if there is the key, - * otherwise return false. On success, we set the value to *val_p so it must + * otherwise return false. On success, we set the value to *val_p so it must * not be NULL. */ RT_SCOPE bool @@ -1633,14 +1661,20 @@ RT_SEARCH(RT_RADIX_TREE *tree, uint64 key, RT_VALUE_TYPE *value_p) { RT_PTR_LOCAL node; int shift; + bool found; #ifdef RT_SHMEM Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC); #endif Assert(value_p != NULL); + RT_LOCK_SHARED(tree); + if (!RT_PTR_ALLOC_IS_VALID(tree->ctl->root) || key > tree->ctl->max_val) + { + RT_UNLOCK(tree); return false; + } node = RT_PTR_GET_LOCAL(tree, tree->ctl->root); shift = node->shift; @@ -1654,13 +1688,19 @@ RT_SEARCH(RT_RADIX_TREE *tree, uint64 key, RT_VALUE_TYPE *value_p) break; if (!RT_NODE_SEARCH_INNER(node, key, &child)) + { + RT_UNLOCK(tree); return false; + } node = RT_PTR_GET_LOCAL(tree, child); shift -= RT_NODE_SPAN; } - return RT_NODE_SEARCH_LEAF(node, key, value_p); + found = RT_NODE_SEARCH_LEAF(node, key, value_p); + + RT_UNLOCK(tree); + return found; } #ifdef RT_USE_DELETE @@ -1682,8 +1722,13 @@ RT_DELETE(RT_RADIX_TREE *tree, uint64 key) Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC); #endif + RT_LOCK_EXCLUSIVE(tree); + if (!RT_PTR_ALLOC_IS_VALID(tree->ctl->root) || key > tree->ctl->max_val) + { + RT_UNLOCK(tree); return false; + } /* * Descend the tree to search the key while building a stack of nodes we @@ -1702,7 +1747,10 @@ RT_DELETE(RT_RADIX_TREE *tree, uint64 key) node = RT_PTR_GET_LOCAL(tree, allocnode); if (!RT_NODE_SEARCH_INNER(node, key, &child)) + { + RT_UNLOCK(tree); return false; + } allocnode = child; shift -= RT_NODE_SPAN; @@ -1715,6 +1763,7 @@ RT_DELETE(RT_RADIX_TREE *tree, uint64 key) if (!deleted) { /* no key is found in the leaf node */ + RT_UNLOCK(tree); return false; } @@ -1726,7 +1775,10 @@ RT_DELETE(RT_RADIX_TREE *tree, uint64 key) * node. */ if (node->count > 0) + { + RT_UNLOCK(tree); return true; + } /* Free the empty leaf node */ RT_FREE_NODE(tree, allocnode); @@ -1748,6 +1800,7 @@ RT_DELETE(RT_RADIX_TREE *tree, uint64 key) RT_FREE_NODE(tree, allocnode); } + RT_UNLOCK(tree); return true; } #endif @@ -1812,7 +1865,12 @@ RT_UPDATE_ITER_STACK(RT_ITER *iter, RT_PTR_LOCAL from_node, int from) } } -/* Create and return the iterator for the given radix tree */ +/* + * Create and return the iterator for the given radix tree. + * + * The radix tree is locked in shared mode during the iteration, so + * RT_END_ITERATE needs to be called when finished to release the lock. + */ RT_SCOPE RT_ITER * RT_BEGIN_ITERATE(RT_RADIX_TREE *tree) { @@ -1826,6 +1884,8 @@ RT_BEGIN_ITERATE(RT_RADIX_TREE *tree) iter = (RT_ITER *) palloc0(sizeof(RT_ITER)); iter->tree = tree; + RT_LOCK_SHARED(tree); + /* empty tree */ if (!iter->tree->ctl->root) return iter; @@ -1846,7 +1906,7 @@ RT_BEGIN_ITERATE(RT_RADIX_TREE *tree) } /* - * Return true with setting key_p and value_p if there is next key. Otherwise, + * Return true with setting key_p and value_p if there is next key. Otherwise * return false. */ RT_SCOPE bool @@ -1901,9 +1961,20 @@ RT_ITERATE_NEXT(RT_ITER *iter, uint64 *key_p, RT_VALUE_TYPE *value_p) return false; } +/* + * Terminate the iteration and release the lock. + * + * This function needs to be called after finishing or when exiting an + * iteration. + */ RT_SCOPE void RT_END_ITERATE(RT_ITER *iter) { +#ifdef RT_SHMEM + Assert(LWLockHeldByMe(&iter->tree->ctl->lock)); +#endif + + RT_UNLOCK(iter->tree); pfree(iter); } @@ -1915,6 +1986,8 @@ RT_MEMORY_USAGE(RT_RADIX_TREE *tree) { Size total = 0; + RT_LOCK_SHARED(tree); + #ifdef RT_SHMEM Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC); total = dsa_get_total_size(tree->dsa); @@ -1926,6 +1999,7 @@ RT_MEMORY_USAGE(RT_RADIX_TREE *tree) } #endif + RT_UNLOCK(tree); return total; } @@ -2010,6 +2084,8 @@ RT_VERIFY_NODE(RT_PTR_LOCAL node) RT_SCOPE void RT_STATS(RT_RADIX_TREE *tree) { + RT_LOCK_SHARED(tree); + fprintf(stderr, "max_val = " UINT64_FORMAT "\n", tree->ctl->max_val); fprintf(stderr, "num_keys = " UINT64_FORMAT "\n", tree->ctl->num_keys); @@ -2029,6 +2105,8 @@ RT_STATS(RT_RADIX_TREE *tree) tree->ctl->cnt[RT_CLASS_125], tree->ctl->cnt[RT_CLASS_256]); } + + RT_UNLOCK(tree); } static void @@ -2222,14 +2300,18 @@ RT_DUMP_SEARCH(RT_RADIX_TREE *tree, uint64 key) RT_STATS(tree); + RT_LOCK_SHARED(tree); + if (!RT_PTR_ALLOC_IS_VALID(tree->ctl->root)) { + RT_UNLOCK(tree); fprintf(stderr, "empty tree\n"); return; } if (key > tree->ctl->max_val) { + RT_UNLOCK(tree); fprintf(stderr, "key " UINT64_FORMAT "(0x" RT_UINT64_FORMAT_HEX ") is larger than max val\n", key, key); return; @@ -2263,6 +2345,7 @@ RT_DUMP_SEARCH(RT_RADIX_TREE *tree, uint64 key) shift -= RT_NODE_SPAN; level++; } + RT_UNLOCK(tree); fprintf(stderr, "%s", buf.data); } @@ -2274,8 +2357,11 @@ RT_DUMP(RT_RADIX_TREE *tree) RT_STATS(tree); + RT_LOCK_SHARED(tree); + if (!RT_PTR_ALLOC_IS_VALID(tree->ctl->root)) { + RT_UNLOCK(tree); fprintf(stderr, "empty tree\n"); return; } @@ -2283,6 +2369,7 @@ RT_DUMP(RT_RADIX_TREE *tree) initStringInfo(&buf); RT_DUMP_NODE(tree, tree->ctl->root, 0, true, &buf); + RT_UNLOCK(tree); fprintf(stderr, "%s",buf.data); } @@ -2310,6 +2397,9 @@ RT_DUMP(RT_RADIX_TREE *tree) #undef RT_GET_KEY_CHUNK #undef BM_IDX #undef BM_BIT +#undef RT_LOCK_EXCLUSIVE +#undef RT_LOCK_SHARED +#undef RT_UNLOCK #undef RT_NODE_IS_LEAF #undef RT_NODE_MUST_GROW #undef RT_NODE_KIND_COUNT diff --git a/src/test/modules/test_radixtree/test_radixtree.c b/src/test/modules/test_radixtree/test_radixtree.c index 2a93e731ae..bbe1a619b6 100644 --- a/src/test/modules/test_radixtree/test_radixtree.c +++ b/src/test/modules/test_radixtree/test_radixtree.c @@ -144,7 +144,7 @@ test_empty(void) dsa_area *dsa; dsa = dsa_create(tranche_id); - radixtree = rt_create(CurrentMemoryContext, dsa); + radixtree = rt_create(CurrentMemoryContext, dsa, tranche_id); #else radixtree = rt_create(CurrentMemoryContext); #endif @@ -195,7 +195,7 @@ test_basic(int children, bool test_inner) test_inner ? "inner" : "leaf", children); #ifdef RT_SHMEM - radixtree = rt_create(CurrentMemoryContext, dsa); + radixtree = rt_create(CurrentMemoryContext, dsa, tranche_id); #else radixtree = rt_create(CurrentMemoryContext); #endif @@ -363,7 +363,7 @@ test_node_types(uint8 shift) elog(NOTICE, "testing radix tree node types with shift \"%d\"", shift); #ifdef RT_SHMEM - radixtree = rt_create(CurrentMemoryContext, dsa); + radixtree = rt_create(CurrentMemoryContext, dsa, tranche_id); #else radixtree = rt_create(CurrentMemoryContext); #endif @@ -434,7 +434,7 @@ test_pattern(const test_spec * spec) MemoryContextSetIdentifier(radixtree_ctx, spec->test_name); #ifdef RT_SHMEM - radixtree = rt_create(radixtree_ctx, dsa); + radixtree = rt_create(radixtree_ctx, dsa, tranche_id); #else radixtree = rt_create(radixtree_ctx); #endif -- 2.31.1