From c8d0153f7e149a0c68a418ab0204d83dcd457673 Mon Sep 17 00:00:00 2001 From: Chengpeng Yan Date: Thu, 9 Apr 2026 09:48:07 +0800 Subject: [PATCH v5 1/2] ANALYZE: use cursor eviction for distinct-value tracking compute_distinct_stats() currently maintains the count=1 region by shifting entries in track[] whenever a new singleton is inserted. That makes each replacement O(n) in the size of the singleton region. Replace that with a round-robin cursor over the count=1 region. This preserves FIFO eviction order for singleton candidates while making singleton replacement O(1) and avoiding the repeated array shifts needed by the old scheme. --- src/backend/commands/analyze.c | 60 ++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 14 deletions(-) diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 020a5919b84..5a45819a114 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -2108,10 +2108,10 @@ compute_trivial_stats(VacAttrStatsP stats, * * The most common values are determined by brute force: we keep a list * of previously seen values, ordered by number of times seen, as we scan - * the samples. A newly seen value is inserted just after the last - * multiply-seen value, causing the bottommost (oldest) singly-seen value - * to drop off the list. The accuracy of this method, and also its cost, - * depend mainly on the length of the list we are willing to keep. + * the samples. Newly seen values are appended to the list, and when it's + * full we replace the oldest singly-seen value (FIFO) using a round-robin + * cursor (clock hand) over the count=1 region. This avoids repeatedly + * shifting the count=1 region. */ static void compute_distinct_stats(VacAttrStatsP stats, @@ -2138,6 +2138,7 @@ compute_distinct_stats(VacAttrStatsP stats, int track_cnt, track_max; int num_mcv = stats->attstattarget; + int c1_cursor = 0; /* next singleton to evict (FIFO) */ StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data; /* @@ -2221,7 +2222,11 @@ compute_distinct_stats(VacAttrStatsP stats, if (match) { + bool was_count1; + int match_index = j; + /* Found a match */ + was_count1 = (track[j].count == 1); track[j].count++; /* This value may now need to "bubble up" in the track list */ while (j > 0 && track[j].count > track[j - 1].count) @@ -2230,22 +2235,49 @@ compute_distinct_stats(VacAttrStatsP stats, swapInt(track[j].count, track[j - 1].count); j--; } + + /* + * When a singleton becomes multiply-seen, it leaves the count=1 + * region and the preceding singletons shift right by one slot. + * If the cursor points into that range, advance it so it still + * marks the oldest remaining singleton. + */ + if (was_count1 && + c1_cursor >= firstcount1 && + c1_cursor <= match_index) + { + c1_cursor++; + if (c1_cursor >= track_cnt) + c1_cursor = firstcount1 + 1; + } } else { - /* No match. Insert at head of count-1 list */ + int insert_index; + + /* + * No match. Track the value if we still have room; otherwise + * evict the oldest singleton from the count=1 region. + */ if (track_cnt < track_max) - track_cnt++; - for (j = track_cnt - 1; j > firstcount1; j--) - { - track[j].value = track[j - 1].value; - track[j].count = track[j - 1].count; - } - if (firstcount1 < track_cnt) + insert_index = track_cnt++; + else if (firstcount1 < track_cnt) { - track[firstcount1].value = value; - track[firstcount1].count = 1; + /* + * Use c1_cursor as a round-robin cursor over the count=1 + * region. Keep it on a current singleton before evicting. + */ + if (c1_cursor < firstcount1 || c1_cursor >= track_cnt) + c1_cursor = firstcount1; + insert_index = c1_cursor++; + if (c1_cursor >= track_cnt) + c1_cursor = firstcount1; } + else + continue; + + track[insert_index].value = value; + track[insert_index].count = 1; } } -- 2.50.1 (Apple Git-155)