diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index f952b3c..b02ac20 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -2575,7 +2575,8 @@ compute_scalar_stats(VacAttrStatsP stats, * the MCV list is not complete, it's generally worth being more * selective, and not just filling it all the way up to the stats * target. So for an incomplete list, we try to take only MCVs that - * are significantly more common than average. + * are significantly more common than average frequency for values which + * will not be represented in the MCV list. */ if (track_cnt == ndistinct && toowide_cnt == 0 && stats->stadistinct > 0 && @@ -2587,6 +2588,7 @@ compute_scalar_stats(VacAttrStatsP stats, else { double ndistinct_table = stats->stadistinct; + double values_cnt_remaining = (double) values_cnt; double avgcount, mincount, maxmincount; @@ -2594,25 +2596,27 @@ compute_scalar_stats(VacAttrStatsP stats, /* Re-extract estimate of # distinct nonnull values in table */ if (ndistinct_table < 0) ndistinct_table = -ndistinct_table * totalrows; - /* estimate # occurrences in sample of a typical nonnull value */ - avgcount = (double) nonnull_cnt / ndistinct_table; - /* set minimum threshold count to store a value */ - mincount = avgcount * 1.25; - if (mincount < 2) - mincount = 2; - /* don't let threshold exceed 1/K, however */ - maxmincount = (double) values_cnt / (double) num_bins; - if (mincount > maxmincount) - mincount = maxmincount; - if (num_mcv > track_cnt) - num_mcv = track_cnt; - for (i = 0; i < num_mcv; i++) - { + /* estimate # of occurrences in sample of a typical nonnull value */ + for (i = 0; i < num_mcv; i++) + { + avgcount = values_cnt_remaining / ndistinct_table; + /* set minimum threshold count to store a value */ + mincount = avgcount * 1.25; + if (mincount < 2) + mincount = 2; + /* don't let threshold exceed 1/K, however */ + maxmincount = (double) values_cnt / (double) (num_bins); + if (mincount > maxmincount) + mincount = maxmincount; + if (num_mcv > track_cnt) + num_mcv = track_cnt; if (track[i].count < mincount) { num_mcv = i; break; } + values_cnt_remaining -= track[i].count; + ndistinct_table--; } }