From 2de9b5bc18bfa169b3ba3507b6bdf79d277c0ad4 Mon Sep 17 00:00:00 2001 From: Nazir Bilal Yavuz Date: Fri, 13 Feb 2026 13:36:34 +0300 Subject: [PATCH v7 2/2] Use 4 vectors in CopyReadLineText() SIMD --- src/backend/commands/copyfromparse.c | 116 +++++++++++++++++++++------ 1 file changed, 92 insertions(+), 24 deletions(-) diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index 4a127d1af90..caadc40cc8b 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -1361,6 +1361,9 @@ CopyReadLineText(CopyFromState cstate, bool is_csv, bool simd_enabled) * escape). This is faster than byte-by-byte iteration, especially on * large buffers. * + * For better instruction-level parallelism, we try to process four + * vectors at a time. + * * We do not apply the SIMD fast path in either of the following * cases: - When the previously processed character was an escape * character (last_was_esc), since the next byte must be examined @@ -1373,53 +1376,118 @@ CopyReadLineText(CopyFromState cstate, bool is_csv, bool simd_enabled) * the rest of the input once we encounter a special character which * is neither EOF nor EOL. */ - if (simd_enabled && !last_was_esc && copy_buf_len - input_buf_ptr > sizeof(Vector8)) + if (simd_enabled && !last_was_esc && copy_buf_len - input_buf_ptr >= 4 * sizeof(Vector8)) { - Vector8 chunk; - Vector8 match = vector8_broadcast(0); - uint32 mask; - - /* Load a chunk of data into a vector register */ - vector8_load(&chunk, (const uint8 *) ©_input_buf[input_buf_ptr]); + Vector8 chunk1, + chunk2, + chunk3, + chunk4; + Vector8 match1, + match2, + match3, + match4; + Vector8 tmp1, + tmp2, + result; + + /* Load four chunks of data into vector registers */ + vector8_load(&chunk1, (const uint8 *) ©_input_buf[input_buf_ptr]); + vector8_load(&chunk2, (const uint8 *) ©_input_buf[input_buf_ptr + sizeof(Vector8)]); + vector8_load(&chunk3, (const uint8 *) ©_input_buf[input_buf_ptr + 2 * sizeof(Vector8)]); + vector8_load(&chunk4, (const uint8 *) ©_input_buf[input_buf_ptr + 3 * sizeof(Vector8)]); if (is_csv) { /* \n and \r are not special inside quotes */ if (!in_quote) - match = vector8_or(vector8_eq(chunk, nl), vector8_eq(chunk, cr)); + { + match1 = vector8_or(vector8_eq(chunk1, nl), vector8_eq(chunk1, cr)); + match2 = vector8_or(vector8_eq(chunk2, nl), vector8_eq(chunk2, cr)); + match3 = vector8_or(vector8_eq(chunk3, nl), vector8_eq(chunk3, cr)); + match4 = vector8_or(vector8_eq(chunk4, nl), vector8_eq(chunk4, cr)); + } + else + { + match1 = vector8_broadcast(0); + match2 = vector8_broadcast(0); + match3 = vector8_broadcast(0); + match4 = vector8_broadcast(0); + } - match = vector8_or(match, vector8_eq(chunk, quote)); + match1 = vector8_or(match1, vector8_eq(chunk1, quote)); + match2 = vector8_or(match2, vector8_eq(chunk2, quote)); + match3 = vector8_or(match3, vector8_eq(chunk3, quote)); + match4 = vector8_or(match4, vector8_eq(chunk4, quote)); if (escapec != '\0') - match = vector8_or(match, vector8_eq(chunk, escape)); + { + match1 = vector8_or(match1, vector8_eq(chunk1, escape)); + match2 = vector8_or(match2, vector8_eq(chunk2, escape)); + match3 = vector8_or(match3, vector8_eq(chunk3, escape)); + match4 = vector8_or(match4, vector8_eq(chunk4, escape)); + } } else { - match = vector8_or(vector8_eq(chunk, nl), vector8_eq(chunk, cr)); - match = vector8_or(match, vector8_eq(chunk, bs)); + match1 = vector8_or(vector8_eq(chunk1, nl), vector8_eq(chunk1, cr)); + match2 = vector8_or(vector8_eq(chunk2, nl), vector8_eq(chunk2, cr)); + match3 = vector8_or(vector8_eq(chunk3, nl), vector8_eq(chunk3, cr)); + match4 = vector8_or(vector8_eq(chunk4, nl), vector8_eq(chunk4, cr)); + + match1 = vector8_or(match1, vector8_eq(chunk1, bs)); + match2 = vector8_or(match2, vector8_eq(chunk2, bs)); + match3 = vector8_or(match3, vector8_eq(chunk3, bs)); + match4 = vector8_or(match4, vector8_eq(chunk4, bs)); } - /* Check if we found any special characters */ - mask = vector8_highbit_mask(match); - if (mask != 0) + /* Combine results to check if any chunk has special characters */ + tmp1 = vector8_or(match1, match2); + tmp2 = vector8_or(match3, match4); + result = vector8_or(tmp1, tmp2); + + if (vector8_is_highbit_set(result)) { /* - * Found a special character. Advance up to that point and let - * the scalar code handle it. + * Found a special character somewhere in the four chunks. + * Identify the first chunk containing it. */ - int advance = pg_rightmost_one_pos32(mask); + uint32 mask; + int advance; char c1, c2; bool simd_hit_eol, simd_hit_eof; + mask = vector8_highbit_mask(match1); + if (mask == 0) + { + input_buf_ptr += sizeof(Vector8); + mask = vector8_highbit_mask(match2); + } + if (mask == 0) + { + input_buf_ptr += sizeof(Vector8); + mask = vector8_highbit_mask(match3); + } + if (mask == 0) + { + input_buf_ptr += sizeof(Vector8); + mask = vector8_highbit_mask(match4); + } + Assert(mask != 0); + + /* + * Found a special character. Advance up to that point and let + * the scalar code handle it. + */ + advance = pg_rightmost_one_pos32(mask); input_buf_ptr += advance; c1 = copy_input_buf[input_buf_ptr]; /* - * Since we stopped within the chunk and ((copy_buf_len - - * input_buf_ptr) > sizeof(Vector8)) is true, - * copy_input_buf[input_buf_ptr + 1] is guaranteed to be - * readable. + * Since we stopped within the block and ((copy_buf_len - + * input_buf_ptr) >= 4 * sizeof(Vector8)) was true at the + * start, copy_input_buf[input_buf_ptr + 1] is guaranteed to + * be readable. */ c2 = copy_input_buf[input_buf_ptr + 1]; simd_hit_eol = (c1 == '\r' || c1 == '\n') && (!is_csv || !in_quote); @@ -1438,8 +1506,8 @@ CopyReadLineText(CopyFromState cstate, bool is_csv, bool simd_enabled) } else { - /* No special characters found, so skip the entire chunk */ - input_buf_ptr += sizeof(Vector8); + /* No special characters found, so skip the entire block */ + input_buf_ptr += 4 * sizeof(Vector8); continue; } } -- 2.47.3