From 78faaad01a69a5a81eb219e3f45983c1b466e173 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Sat, 21 Jan 2023 12:52:53 +0700 Subject: [PATCH v22 11/22] Expand commentary for kinds vs. size classes Also move class enum closer to array and add #undef's --- src/include/lib/radixtree.h | 76 ++++++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 23 deletions(-) diff --git a/src/include/lib/radixtree.h b/src/include/lib/radixtree.h index 6cc8442c89..4a2dad82bf 100644 --- a/src/include/lib/radixtree.h +++ b/src/include/lib/radixtree.h @@ -288,22 +288,26 @@ RT_SCOPE void RT_STATS(RT_RADIX_TREE *tree); #define BM_BIT(x) ((x) % BITS_PER_BITMAPWORD) /* - * Supported radix tree node kinds and size classes. + * Node kinds * - * There are 4 node kinds and each node kind have one or two size classes, - * partial and full. The size classes in the same node kind have the same - * node structure but have the different number of fanout that is stored - * in 'fanout' of RT_NODE. For example in size class 15, when a 16th element - * is to be inserted, we allocate a larger area and memcpy the entire old - * node to it. + * The different node kinds are what make the tree "adaptive". * - * This technique allows us to limit the node kinds to 4, which limits the - * number of cases in switch statements. It also allows a possible future - * optimization to encode the node kind in a pointer tag. + * Each node kind is associated with a different datatype and different + * search/set/delete/iterate algorithms adapted for its size. The largest + * kind, node256 is basically the same as a traditional radix tree, + * and would be most wasteful of memory when sparsely populated. The + * smaller nodes expend some additional CPU time to enable a smaller + * memory footprint. * - * These size classes have been chose carefully so that it minimizes the - * allocator padding in both the inner and leaf nodes on DSA. - * node + * XXX There are 4 node kinds, and this should never be increased, + * for several reasons: + * 1. With 5 or more kinds, gcc tends to use a jump table for switch + * statments. + * 2. The 4 kinds can be represented with 2 bits, so we have the option + * in the future to tag the node pointer with the kind, even on + * platforms with 32-bit pointers. This might speed up node traversal + * in trees with highly random node kinds. + * 3. We can have multiple size classes per node kind. */ #define RT_NODE_KIND_3 0x00 #define RT_NODE_KIND_32 0x01 @@ -320,16 +324,6 @@ RT_SCOPE void RT_STATS(RT_RADIX_TREE *tree); #endif /* RT_COMMON */ - -typedef enum RT_SIZE_CLASS -{ - RT_CLASS_3_FULL = 0, - RT_CLASS_32_PARTIAL, - RT_CLASS_32_FULL, - RT_CLASS_125_FULL, - RT_CLASS_256 -} RT_SIZE_CLASS; - /* Common type for all nodes types */ typedef struct RT_NODE { @@ -508,6 +502,37 @@ typedef struct RT_NODE_LEAF_256 RT_VALUE_TYPE values[RT_NODE_MAX_SLOTS]; } RT_NODE_LEAF_256; +/* + * Node size classes + * + * Nodes of different kinds necessarily belong to different size classes. + * The main innovation in our implementation compared to the ART paper + * is decoupling the notion of size class from kind. + * + * The size classes within a given node kind have the same underlying + * type, but a variable number of children/values. This is possible + * because the base type contains small fixed data structures that + * work the same way regardless of how full the node is. We store the + * node's allocated capacity in the "fanout" member of RT_NODE, to allow + * runtime introspection. + * + * Growing from one node kind to another requires special code for each + * case, but growing from one size class to another within the same kind + * is basically just allocate + memcpy. + * + * The size classes have been chosen so that inner nodes on platforms + * with 64-bit pointers (and leaf nodes when using a 64-bit key) are + * equal to or slightly smaller than some DSA size class. + */ +typedef enum RT_SIZE_CLASS +{ + RT_CLASS_3_FULL = 0, + RT_CLASS_32_PARTIAL, + RT_CLASS_32_FULL, + RT_CLASS_125_FULL, + RT_CLASS_256 +} RT_SIZE_CLASS; + /* Information for each size class */ typedef struct RT_SIZE_CLASS_ELEM { @@ -2217,6 +2242,7 @@ RT_DUMP(RT_RADIX_TREE *tree) #undef NODE_IS_EMPTY #undef VAR_NODE_HAS_FREE_SLOT #undef FIXED_NODE_HAS_FREE_SLOT +#undef RT_NODE_KIND_COUNT #undef RT_SIZE_CLASS_COUNT #undef RT_RADIX_TREE_MAGIC @@ -2229,6 +2255,10 @@ RT_DUMP(RT_RADIX_TREE *tree) #undef RT_ITER #undef RT_NODE #undef RT_NODE_ITER +#undef RT_NODE_KIND_3 +#undef RT_NODE_KIND_32 +#undef RT_NODE_KIND_125 +#undef RT_NODE_KIND_256 #undef RT_NODE_BASE_3 #undef RT_NODE_BASE_32 #undef RT_NODE_BASE_125 -- 2.39.0