From df78d0bad95eb1572826a61bde6f4d6a7e1b639d Mon Sep 17 00:00:00 2001 From: Evdokimov Ilia Date: Sun, 3 May 2026 16:49:11 +0300 Subject: [PATCH v6 1/2] ANALYZE: use cursor eviction for distinct-value tracking compute_distinct_stats() currently maintains the count=1 region by shifting entries in track[] whenever a new singleton is inserted. That makes each replacement O(n) in the size of the singleton region. Replace that with a round-robin cursor over the count=1 region. This preserves FIFO eviction order for singleton candidates while making singleton replacement O(1) and avoiding the repeated array shifts needed by the old scheme. --- src/backend/commands/analyze.c | 79 ++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 19 deletions(-) diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 020a5919b84..262adf41b24 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -56,7 +56,6 @@ #include "utils/syscache.h" #include "utils/timestamp.h" - /* Per-index data for ANALYZE */ typedef struct AnlIndexData { @@ -2108,10 +2107,11 @@ compute_trivial_stats(VacAttrStatsP stats, * * The most common values are determined by brute force: we keep a list * of previously seen values, ordered by number of times seen, as we scan - * the samples. A newly seen value is inserted just after the last - * multiply-seen value, causing the bottommost (oldest) singly-seen value - * to drop off the list. The accuracy of this method, and also its cost, - * depend mainly on the length of the list we are willing to keep. + * the samples. Newly seen values are appended to the list, and when it's + * full we replace the oldest singly-seen value (FIFO) using a round-robin + * cursor (clock hand) over the count=1 region. This avoids repeatedly + * shifting the count=1 region, and when hashing is enabled, avoids having + * to update a large number of hash->index mappings. */ static void compute_distinct_stats(VacAttrStatsP stats, @@ -2138,6 +2138,8 @@ compute_distinct_stats(VacAttrStatsP stats, int track_cnt, track_max; int num_mcv = stats->attstattarget; + int firstcount1 = 0, /* index of first singleton in track[] */ + c1_cursor = 0; /* next singleton to evict (FIFO) */ StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data; /* @@ -2156,7 +2158,7 @@ compute_distinct_stats(VacAttrStatsP stats, Datum value; bool isnull; bool match; - int firstcount1, + int match_index, j; vacuum_delay_point(true); @@ -2213,6 +2215,7 @@ compute_distinct_stats(VacAttrStatsP stats, value, track[j].value))) { match = true; + match_index = j; break; } if (j < firstcount1 && track[j].count == 1) @@ -2221,31 +2224,69 @@ compute_distinct_stats(VacAttrStatsP stats, if (match) { + bool was_count1; + /* Found a match */ - track[j].count++; + was_count1 = (track[match_index].count == 1); + track[match_index].count++; /* This value may now need to "bubble up" in the track list */ - while (j > 0 && track[j].count > track[j - 1].count) + for (j = match_index; j > 0 && track[j].count > track[j - 1].count; j--) { swapDatum(track[j].value, track[j - 1].value); swapInt(track[j].count, track[j - 1].count); - j--; } + + /* + * If a singleton was promoted, the singleton region shrinks by + * one element, so advance its boundary. + */ + if (was_count1) + { + firstcount1++; + + /* + * If the cursor was pointing into the singleton region, + * advance it so it still refers to a valid singleton. + */ + if (c1_cursor >= firstcount1 - 1) + { + c1_cursor++; + if (c1_cursor >= track_cnt) + c1_cursor = firstcount1; + } + } + + /* Final safety: keep cursor inside singleton region */ + if (c1_cursor < firstcount1 || c1_cursor >= track_cnt) + c1_cursor = firstcount1; } else { - /* No match. Insert at head of count-1 list */ + int insert_index; + + /* + * No match. Track the value if we still have room; otherwise + * evict the oldest singleton from the count=1 region. + */ if (track_cnt < track_max) - track_cnt++; - for (j = track_cnt - 1; j > firstcount1; j--) - { - track[j].value = track[j - 1].value; - track[j].count = track[j - 1].count; - } - if (firstcount1 < track_cnt) + insert_index = track_cnt++; + else if (firstcount1 < track_cnt) { - track[firstcount1].value = value; - track[firstcount1].count = 1; + /* + * Use c1_cursor as a round-robin cursor over the count=1 + * region. Keep it on a current singleton before evicting. + */ + if (c1_cursor < firstcount1 || c1_cursor >= track_cnt) + c1_cursor = firstcount1; + insert_index = c1_cursor++; + if (c1_cursor >= track_cnt) + c1_cursor = firstcount1; } + else + continue; + + track[insert_index].value = value; + track[insert_index].count = 1; } } -- 2.34.1