From 503ccef8841efcb0809acc73f6f4cc2428342080 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Sat, 21 Jan 2023 14:21:55 +0700 Subject: [PATCH v21 14/22] Add some comments for insert logic --- src/include/lib/radixtree.h | 29 ++++++++++++++++++++++--- src/include/lib/radixtree_insert_impl.h | 5 +++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/include/lib/radixtree.h b/src/include/lib/radixtree.h index 567eab4bc8..d48c915373 100644 --- a/src/include/lib/radixtree.h +++ b/src/include/lib/radixtree.h @@ -731,8 +731,8 @@ RT_NODE_3_GET_INSERTPOS(RT_NODE_BASE_3 *node, uint8 chunk) } /* - * Return index of the first element in 'base' that equals 'key'. Return -1 - * if there is no such element. + * Return index of the first element in the node's chunk array that equals + * 'chunk'. Return -1 if there is no such element. */ static inline int RT_NODE_32_SEARCH_EQ(RT_NODE_BASE_32 *node, uint8 chunk) @@ -762,14 +762,22 @@ RT_NODE_32_SEARCH_EQ(RT_NODE_BASE_32 *node, uint8 chunk) #endif #ifndef USE_NO_SIMD + /* replicate the search key */ spread_chunk = vector8_broadcast(chunk); + + /* compare to the 32 keys stored in the node */ vector8_load(&haystack1, &node->chunks[0]); vector8_load(&haystack2, &node->chunks[sizeof(Vector8)]); cmp1 = vector8_eq(spread_chunk, haystack1); cmp2 = vector8_eq(spread_chunk, haystack2); + + /* convert comparison to a bitfield */ bitfield = vector8_highbit_mask(cmp1) | (vector8_highbit_mask(cmp2) << sizeof(Vector8)); + + /* mask off invalid entries */ bitfield &= ((UINT64CONST(1) << count) - 1); + /* convert bitfield to index by counting trailing zeros */ if (bitfield) index_simd = pg_rightmost_one_pos32(bitfield); @@ -781,7 +789,8 @@ RT_NODE_32_SEARCH_EQ(RT_NODE_BASE_32 *node, uint8 chunk) } /* - * Return index of the chunk to insert into chunks in the given node. + * Return index of the node's chunk array to insert into, + * such that the chunk array remains ordered. */ static inline int RT_NODE_32_GET_INSERTPOS(RT_NODE_BASE_32 *node, uint8 chunk) @@ -804,12 +813,26 @@ RT_NODE_32_GET_INSERTPOS(RT_NODE_BASE_32 *node, uint8 chunk) for (index = 0; index < count; index++) { + /* + * This is coded with '>=' to match what we can do with SIMD, + * with an assert to keep us honest. + */ if (node->chunks[index] >= chunk) + { + Assert(node->chunks[index] != chunk); break; + } } #endif #ifndef USE_NO_SIMD + /* + * This is a bit more complicated than RT_NODE_32_SEARCH_EQ(), because + * no unsigned uint8 comparison instruction exists, at least for SSE2. So + * we need to play some trickery using vector8_min() to effectively get + * <=. There'll never be any equal elements in the current uses, but that's + * what we get here... + */ spread_chunk = vector8_broadcast(chunk); vector8_load(&haystack1, &node->chunks[0]); vector8_load(&haystack2, &node->chunks[sizeof(Vector8)]); diff --git a/src/include/lib/radixtree_insert_impl.h b/src/include/lib/radixtree_insert_impl.h index 16461bdb03..8470c8fc70 100644 --- a/src/include/lib/radixtree_insert_impl.h +++ b/src/include/lib/radixtree_insert_impl.h @@ -162,6 +162,11 @@ #endif } + /* + * Since we just copied a dense array, we can set the bits + * using a single store, provided the length of that array + * is at most the number of bits in a bitmapword. + */ Assert(class32_max.fanout <= sizeof(bitmapword) * BITS_PER_BYTE); new125->base.isset[0] = (bitmapword) (((uint64) 1 << class32_max.fanout) - 1); -- 2.39.0