From 989dd2cb442c1c2a6182bb5f7785c52f4d5cdb5e Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Mon, 17 Apr 2023 17:33:21 +0900 Subject: [PATCH v32 09/18] radix tree: Review tree iteration code Cleanup the routines and improve comments and variable names. --- src/include/lib/radixtree.h | 152 ++++++++++++++------------ src/include/lib/radixtree_iter_impl.h | 85 +++++++------- 2 files changed, 118 insertions(+), 119 deletions(-) diff --git a/src/include/lib/radixtree.h b/src/include/lib/radixtree.h index 088d1dfd9d..8bea606c62 100644 --- a/src/include/lib/radixtree.h +++ b/src/include/lib/radixtree.h @@ -83,7 +83,7 @@ * RT_SET - Set a key-value pair * RT_BEGIN_ITERATE - Begin iterating through all key-value pairs * RT_ITERATE_NEXT - Return next key-value pair, if any - * RT_END_ITER - End iteration + * RT_END_ITERATE - End iteration * RT_MEMORY_USAGE - Get the memory usage * * Interface for Shared Memory @@ -191,7 +191,7 @@ #define RT_NODE_INSERT_LEAF RT_MAKE_NAME(node_insert_leaf) #define RT_NODE_INNER_ITERATE_NEXT RT_MAKE_NAME(node_inner_iterate_next) #define RT_NODE_LEAF_ITERATE_NEXT RT_MAKE_NAME(node_leaf_iterate_next) -#define RT_UPDATE_ITER_STACK RT_MAKE_NAME(update_iter_stack) +#define RT_ITER_SET_NODE_FROM RT_MAKE_NAME(iter_set_node_from) #define RT_ITER_UPDATE_KEY RT_MAKE_NAME(iter_update_key) #define RT_VERIFY_NODE RT_MAKE_NAME(verify_node) @@ -650,36 +650,40 @@ typedef struct RT_RADIX_TREE * Iteration support. * * Iterating the radix tree returns each pair of key and value in the ascending - * order of the key. To support this, the we iterate nodes of each level. + * order of the key. * - * RT_NODE_ITER struct is used to track the iteration within a node. + * RT_NODE_ITER is the struct for iteration of one radix tree node. * * RT_ITER is the struct for iteration of the radix tree, and uses RT_NODE_ITER - * in order to track the iteration of each level. During iteration, we also - * construct the key whenever updating the node iteration information, e.g., when - * advancing the current index within the node or when moving to the next node - * at the same level. - * - * XXX: Currently we allow only one process to do iteration. Therefore, rt_node_iter - * has the local pointers to nodes, rather than RT_PTR_ALLOC. - * We need either a safeguard to disallow other processes to begin the iteration - * while one process is doing or to allow multiple processes to do the iteration. + * for each level to track the iteration within the node. */ typedef struct RT_NODE_ITER { - RT_PTR_LOCAL node; /* current node being iterated */ - int current_idx; /* current position. -1 for initial value */ + /* + * Local pointer to the node we are iterating over. + * + * Since the radix tree doesn't support the shared iteration among multiple + * processes, we use RT_PTR_LOCAL rather than RT_PTR_ALLOC. + */ + RT_PTR_LOCAL node; + + /* + * The next index of the chunk array in RT_NODE_KIND_3 and + * RT_NODE_KIND_32 nodes, or the next chunk in RT_NODE_KIND_125 and + * RT_NODE_KIND_256 nodes. 0 for the initial value. + */ + int idx; } RT_NODE_ITER; typedef struct RT_ITER { RT_RADIX_TREE *tree; - /* Track the iteration on nodes of each level */ - RT_NODE_ITER stack[RT_MAX_LEVEL]; - int stack_len; + /* Track the nodes for each level. level = 0 is for a leaf node */ + RT_NODE_ITER node_iters[RT_MAX_LEVEL]; + int top_level; - /* The key is constructed during iteration */ + /* The key constructed during the iteration */ uint64 key; } RT_ITER; @@ -1804,16 +1808,9 @@ RT_DELETE(RT_RADIX_TREE *tree, uint64 key) } #endif -static inline void -RT_ITER_UPDATE_KEY(RT_ITER *iter, uint8 chunk, uint8 shift) -{ - iter->key &= ~(((uint64) RT_CHUNK_MASK) << shift); - iter->key |= (((uint64) chunk) << shift); -} - /* - * Advance the slot in the inner node. Return the child if exists, otherwise - * null. + * Scan the inner node and return the next child node if exist, otherwise + * return NULL. */ static inline RT_PTR_LOCAL RT_NODE_INNER_ITERATE_NEXT(RT_ITER *iter, RT_NODE_ITER *node_iter) @@ -1824,8 +1821,8 @@ RT_NODE_INNER_ITERATE_NEXT(RT_ITER *iter, RT_NODE_ITER *node_iter) } /* - * Advance the slot in the leaf node. On success, return true and the value - * is set to value_p, otherwise return false. + * Scan the leaf node, and return true and the next value is set to value_p + * if exists. Otherwise return false. */ static inline bool RT_NODE_LEAF_ITERATE_NEXT(RT_ITER *iter, RT_NODE_ITER *node_iter, @@ -1837,29 +1834,50 @@ RT_NODE_LEAF_ITERATE_NEXT(RT_ITER *iter, RT_NODE_ITER *node_iter, } /* - * Update each node_iter for inner nodes in the iterator node stack. + * While descending the radix tree from the 'from' node to the bottom, we + * set the next node to iterate for each level. */ static void -RT_UPDATE_ITER_STACK(RT_ITER *iter, RT_PTR_LOCAL from_node, int from) +RT_ITER_SET_NODE_FROM(RT_ITER *iter, RT_PTR_LOCAL from) { - int level = from; - RT_PTR_LOCAL node = from_node; + int level = from->shift / RT_NODE_SPAN; + RT_PTR_LOCAL node = from; for (;;) { - RT_NODE_ITER *node_iter = &(iter->stack[level--]); + RT_NODE_ITER *node_iter = &(iter->node_iters[level--]); + +#ifdef USE_ASSERT_CHECKING + if (node_iter->node) + { + /* We must have finished the iteration on the previous node */ + if (RT_NODE_IS_LEAF(node_iter->node)) + { + uint64 dummy; + Assert(!RT_NODE_LEAF_ITERATE_NEXT(iter, node_iter, &dummy)); + } + else + Assert(!RT_NODE_INNER_ITERATE_NEXT(iter, node_iter)); + } +#endif + /* Set the node to the node iterator of this level */ node_iter->node = node; - node_iter->current_idx = -1; + node_iter->idx = 0; - /* We don't advance the leaf node iterator here */ if (RT_NODE_IS_LEAF(node)) - return; + { + /* We will visit the leaf node when RT_ITERATE_NEXT() */ + break; + } - /* Advance to the next slot in the inner node */ + /* + * Get the first child node from the node, which corresponds to the + * lowest chunk within the node. + */ node = RT_NODE_INNER_ITERATE_NEXT(iter, node_iter); - /* We must find the first children in the node */ + /* The first child must be found */ Assert(node); } } @@ -1873,14 +1891,11 @@ RT_UPDATE_ITER_STACK(RT_ITER *iter, RT_PTR_LOCAL from_node, int from) RT_SCOPE RT_ITER * RT_BEGIN_ITERATE(RT_RADIX_TREE *tree) { - MemoryContext old_ctx; RT_ITER *iter; RT_PTR_LOCAL root; - int top_level; - old_ctx = MemoryContextSwitchTo(tree->context); - - iter = (RT_ITER *) palloc0(sizeof(RT_ITER)); + iter = (RT_ITER *) MemoryContextAllocZero(tree->context, + sizeof(RT_ITER)); iter->tree = tree; RT_LOCK_SHARED(tree); @@ -1890,16 +1905,13 @@ RT_BEGIN_ITERATE(RT_RADIX_TREE *tree) return iter; root = RT_PTR_GET_LOCAL(tree, iter->tree->ctl->root); - top_level = root->shift / RT_NODE_SPAN; - iter->stack_len = top_level; + iter->top_level = root->shift / RT_NODE_SPAN; /* - * Descend to the left most leaf node from the root. The key is being - * constructed while descending to the leaf. + * Set the next node to iterate for each level from the level of the + * root node. */ - RT_UPDATE_ITER_STACK(iter, root, top_level); - - MemoryContextSwitchTo(old_ctx); + RT_ITER_SET_NODE_FROM(iter, root); return iter; } @@ -1911,6 +1923,8 @@ RT_BEGIN_ITERATE(RT_RADIX_TREE *tree) RT_SCOPE bool RT_ITERATE_NEXT(RT_ITER *iter, uint64 *key_p, RT_VALUE_TYPE *value_p) { + Assert(value_p != NULL); + /* Empty tree */ if (!iter->tree->ctl->root) return false; @@ -1918,43 +1932,38 @@ RT_ITERATE_NEXT(RT_ITER *iter, uint64 *key_p, RT_VALUE_TYPE *value_p) for (;;) { RT_PTR_LOCAL child = NULL; - RT_VALUE_TYPE value; - int level; - bool found; - - /* Advance the leaf node iterator to get next key-value pair */ - found = RT_NODE_LEAF_ITERATE_NEXT(iter, &(iter->stack[0]), &value); - if (found) + /* Get the next chunk of the leaf node */ + if (RT_NODE_LEAF_ITERATE_NEXT(iter, &(iter->node_iters[0]), value_p)) { *key_p = iter->key; - *value_p = value; return true; } /* - * We've visited all values in the leaf node, so advance inner node - * iterators from the level=1 until we find the next child node. + * We've visited all values in the leaf node, so advance all inner node + * iterators by visiting inner nodes from the level = 1 until we find the + * next inner node that has a child node. */ - for (level = 1; level <= iter->stack_len; level++) + for (int level = 1; level <= iter->top_level; level++) { - child = RT_NODE_INNER_ITERATE_NEXT(iter, &(iter->stack[level])); + child = RT_NODE_INNER_ITERATE_NEXT(iter, &(iter->node_iters[level])); if (child) break; } - /* the iteration finished */ + /* We've visited all nodes, so the iteration finished */ if (!child) - return false; + break; /* - * Set the node to the node iterator and update the iterator stack - * from this node. + * Found the new child node. We update the next node to iterate for each + * level from the level of this child node. */ - RT_UPDATE_ITER_STACK(iter, child, level - 1); + RT_ITER_SET_NODE_FROM(iter, child); - /* Node iterators are updated, so try again from the leaf */ + /* Find key-value from the leaf node again */ } return false; @@ -2508,8 +2517,7 @@ RT_DUMP(RT_RADIX_TREE *tree) #undef RT_NODE_INSERT_LEAF #undef RT_NODE_INNER_ITERATE_NEXT #undef RT_NODE_LEAF_ITERATE_NEXT -#undef RT_UPDATE_ITER_STACK -#undef RT_ITER_UPDATE_KEY +#undef RT_RT_ITER_SET_NODE_FROM #undef RT_VERIFY_NODE #undef RT_DEBUG diff --git a/src/include/lib/radixtree_iter_impl.h b/src/include/lib/radixtree_iter_impl.h index 98c78eb237..5c1034768e 100644 --- a/src/include/lib/radixtree_iter_impl.h +++ b/src/include/lib/radixtree_iter_impl.h @@ -27,12 +27,10 @@ #error node level must be either inner or leaf #endif - bool found = false; - uint8 key_chunk; + uint8 key_chunk = 0; #ifdef RT_NODE_LEVEL_LEAF - RT_VALUE_TYPE value; - + Assert(value_p != NULL); Assert(RT_NODE_IS_LEAF(node_iter->node)); #else RT_PTR_LOCAL child = NULL; @@ -50,99 +48,92 @@ { RT_NODE3_TYPE *n3 = (RT_NODE3_TYPE *) node_iter->node; - node_iter->current_idx++; - if (node_iter->current_idx >= n3->base.n.count) - break; + if (node_iter->idx >= n3->base.n.count) + return false; + #ifdef RT_NODE_LEVEL_LEAF - value = n3->values[node_iter->current_idx]; + *value_p = n3->values[node_iter->idx]; #else - child = RT_PTR_GET_LOCAL(iter->tree, n3->children[node_iter->current_idx]); + child = RT_PTR_GET_LOCAL(iter->tree, n3->children[node_iter->idx]); #endif - key_chunk = n3->base.chunks[node_iter->current_idx]; - found = true; + key_chunk = n3->base.chunks[node_iter->idx]; + node_iter->idx++; break; } case RT_NODE_KIND_32: { RT_NODE32_TYPE *n32 = (RT_NODE32_TYPE *) node_iter->node; - node_iter->current_idx++; - if (node_iter->current_idx >= n32->base.n.count) - break; + if (node_iter->idx >= n32->base.n.count) + return false; #ifdef RT_NODE_LEVEL_LEAF - value = n32->values[node_iter->current_idx]; + *value_p = n32->values[node_iter->idx]; #else - child = RT_PTR_GET_LOCAL(iter->tree, n32->children[node_iter->current_idx]); + child = RT_PTR_GET_LOCAL(iter->tree, n32->children[node_iter->idx]); #endif - key_chunk = n32->base.chunks[node_iter->current_idx]; - found = true; + key_chunk = n32->base.chunks[node_iter->idx]; + node_iter->idx++; break; } case RT_NODE_KIND_125: { RT_NODE125_TYPE *n125 = (RT_NODE125_TYPE *) node_iter->node; - int i; + int chunk; - for (i = node_iter->current_idx + 1; i < RT_NODE_MAX_SLOTS; i++) + for (chunk = node_iter->idx; chunk < RT_NODE_MAX_SLOTS; chunk++) { - if (RT_NODE_125_IS_CHUNK_USED((RT_NODE_BASE_125 *) n125, i)) + if (RT_NODE_125_IS_CHUNK_USED((RT_NODE_BASE_125 *) n125, chunk)) break; } - if (i >= RT_NODE_MAX_SLOTS) - break; + if (chunk >= RT_NODE_MAX_SLOTS) + return false; - node_iter->current_idx = i; #ifdef RT_NODE_LEVEL_LEAF - value = RT_NODE_LEAF_125_GET_VALUE(n125, i); + *value_p = RT_NODE_LEAF_125_GET_VALUE(n125, chunk); #else - child = RT_PTR_GET_LOCAL(iter->tree, RT_NODE_INNER_125_GET_CHILD(n125, i)); + child = RT_PTR_GET_LOCAL(iter->tree, RT_NODE_INNER_125_GET_CHILD(n125, chunk)); #endif - key_chunk = i; - found = true; + key_chunk = chunk; + node_iter->idx = chunk + 1; break; } case RT_NODE_KIND_256: { RT_NODE256_TYPE *n256 = (RT_NODE256_TYPE *) node_iter->node; - int i; + int chunk; - for (i = node_iter->current_idx + 1; i < RT_NODE_MAX_SLOTS; i++) + for (chunk = node_iter->idx; chunk < RT_NODE_MAX_SLOTS; chunk++) { #ifdef RT_NODE_LEVEL_LEAF - if (RT_NODE_LEAF_256_IS_CHUNK_USED(n256, i)) + if (RT_NODE_LEAF_256_IS_CHUNK_USED(n256, chunk)) #else - if (RT_NODE_INNER_256_IS_CHUNK_USED(n256, i)) + if (RT_NODE_INNER_256_IS_CHUNK_USED(n256, chunk)) #endif break; } - if (i >= RT_NODE_MAX_SLOTS) - break; + if (chunk >= RT_NODE_MAX_SLOTS) + return false; - node_iter->current_idx = i; #ifdef RT_NODE_LEVEL_LEAF - value = RT_NODE_LEAF_256_GET_VALUE(n256, i); + *value_p = RT_NODE_LEAF_256_GET_VALUE(n256, chunk); #else - child = RT_PTR_GET_LOCAL(iter->tree, RT_NODE_INNER_256_GET_CHILD(n256, i)); + child = RT_PTR_GET_LOCAL(iter->tree, RT_NODE_INNER_256_GET_CHILD(n256, chunk)); #endif - key_chunk = i; - found = true; + key_chunk = chunk; + node_iter->idx = chunk + 1; break; } } - if (found) - { - RT_ITER_UPDATE_KEY(iter, key_chunk, node_iter->node->shift); -#ifdef RT_NODE_LEVEL_LEAF - *value_p = value; -#endif - } + /* Update the part of the key */ + iter->key &= ~(((uint64) RT_CHUNK_MASK) << node_iter->node->shift); + iter->key |= (((uint64) key_chunk) << node_iter->node->shift); #ifdef RT_NODE_LEVEL_LEAF - return found; + return true; #else return child; #endif -- 2.31.1