From c1d3b6fddf5d19c4cdc4b6bb8ad3a94af05c5c54 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Sat, 29 Oct 2022 20:47:31 +0200 Subject: [PATCH 03/11] wip: introduce debug_brin_cross_check --- src/backend/access/brin/brin_minmax.c | 574 ++++++++++++++++++++++++++ src/backend/utils/misc/guc_tables.c | 10 + 2 files changed, 584 insertions(+) diff --git a/src/backend/access/brin/brin_minmax.c b/src/backend/access/brin/brin_minmax.c index 2a4ae5a028e..2f67334543d 100644 --- a/src/backend/access/brin/brin_minmax.c +++ b/src/backend/access/brin/brin_minmax.c @@ -30,6 +30,7 @@ #ifdef DEBUG_BRIN_STATS bool debug_brin_stats = false; +bool debug_brin_cross_check = false; #endif typedef struct MinmaxOpaque @@ -340,6 +341,50 @@ range_values_cmp(const void *a, const void *b, void *arg) return DatumGetInt32(c); } +#ifdef DEBUG_BRIN_STATS +/* + * maxval_start + * Determine first index so that (maxvalue >= value). + * + * The array of ranges is expected to be sorted by maxvalue, so this is the first + * range that can possibly intersect with range having "value" as minval. + */ +static int +maxval_start(BrinRange **ranges, int nranges, Datum value, TypeCacheEntry *typcache) +{ + int start = 0, + end = (nranges - 1); + + // everything matches + if (range_values_cmp(&value, &ranges[start]->max_value, typcache) <= 0) + return 0; + + // no matches + if (range_values_cmp(&value, &ranges[end]->max_value, typcache) > 0) + return nranges; + + while ((end - start) > 0) + { + int midpoint; + int r; + + midpoint = start + (end - start) / 2; + + r = range_values_cmp(&value, &ranges[midpoint]->max_value, typcache); + + if (r <= 0) + end = midpoint; + else + start = (midpoint + 1); + } + + Assert(ranges[start]->max_value >= value); + Assert(ranges[start-1]->max_value < value); + + return start; +} +#endif + /* * minval_end * Determine first index so that (minval > value). @@ -632,6 +677,316 @@ brin_minmax_match_tuples_to_ranges(BrinRanges *ranges, stats->avg_matches_unique = (double) nmatches_unique / nvalues_unique; } +#ifdef DEBUG_BRIN_STATS +/* + * Simple histogram, with bins tracking value and two overlap counts. + * + * XXX Maybe we should have two separate histograms, one for all counts and + * another one for "unique" values. + * + * XXX Serialize the histogram. There might be a data set where we have very + * many distinct buckets (values having very different number of matching + * ranges) - not sure if there's some sort of upper limit (but hard to say for + * other opclasses, like bloom). And we don't want arbitrarily large histogram, + * to keep the statistics fairly small, I guess. So we'd need to pick a subset, + * merge buckets with "similar" counts, or approximate it somehow. For now we + * don't serialize it, because we don't use the histogram. + */ +typedef struct histogram_bin_t +{ + int64 value; + int64 count; +} histogram_bin_t; + +typedef struct histogram_t +{ + int nbins; + int nbins_max; + histogram_bin_t bins[FLEXIBLE_ARRAY_MEMBER]; +} histogram_t; + +#define HISTOGRAM_BINS_START 32 + +/* allocate histogram with default number of bins */ +static histogram_t * +histogram_init(void) +{ + histogram_t *hist; + + hist = (histogram_t *) palloc0(offsetof(histogram_t, bins) + + sizeof(histogram_bin_t) * HISTOGRAM_BINS_START); + hist->nbins_max = HISTOGRAM_BINS_START; + + return hist; +} + +/* + * histogram_add + * Add a hit for a particular value to the histogram. + * + * XXX We don't sort the bins, so just do binary sort. For large number of values + * this might be an issue, for small number of values a linear search is fine. + */ +static histogram_t * +histogram_add(histogram_t *hist, int value) +{ + bool found = false; + histogram_bin_t *bin; + + for (int i = 0; i < hist->nbins; i++) + { + if (hist->bins[i].value == value) + { + bin = &hist->bins[i]; + found = true; + } + } + + if (!found) + { + if (hist->nbins == hist->nbins_max) + { + int nbins = (2 * hist->nbins_max); + + hist = repalloc(hist, offsetof(histogram_t, bins) + + sizeof(histogram_bin_t) * nbins); + hist->nbins_max = nbins; + } + + Assert(hist->nbins < hist->nbins_max); + + bin = &hist->bins[hist->nbins++]; + bin->value = value; + bin->count = 0; + } + + bin->count += 1; + + Assert(bin->value == value); + Assert(bin->count >= 0); + + return hist; +} + +/* used to sort histogram bins by value */ +static int +histogram_bin_cmp(const void *a, const void *b) +{ + histogram_bin_t *ba = (histogram_bin_t *) a; + histogram_bin_t *bb = (histogram_bin_t *) b; + + if (ba->value < bb->value) + return -1; + + if (bb->value < ba->value) + return 1; + + return 0; +} + +static void +histogram_print(histogram_t *hist) +{ + return; + + elog(WARNING, "----- histogram -----"); + for (int i = 0; i < hist->nbins; i++) + { + elog(WARNING, "bin %d value %ld count %ld", + i, hist->bins[i].value, hist->bins[i].count); + } +} + +/* + * brin_minmax_match_tuples_to_ranges2 + * Match tuples to ranges, count average number of ranges per tuple. + * + * Match sample tuples to the ranges, so that we can count how many ranges + * a value matches on average. This might seem redundant to the number of + * overlaps, because the value is ~avg_overlaps/2. + * + * Imagine ranges arranged in "shifted" uniformly by 1/overlaps, e.g. with 3 + * overlaps [0,100], [33,133], [66, 166] and so on. A random value will hit + * only half of there ranges, thus 1/2. This can be extended to randomly + * overlapping ranges. + * + * However, we may not be able to count overlaps for some opclasses (e.g. for + * bloom ranges), in which case we have at least this. + * + * This simply walks the values, and determines matching ranges by looking + * for lower/upper bound in ranges ordered by minval/maxval. + * + * XXX The other question is what to do about duplicate values. If we have a + * very frequent value in the sample, it's likely in many places/ranges. Which + * will skew the average, because it'll be added repeatedly. So we also count + * avg_ranges for unique values. + * + * XXX The relationship that (average_matches ~ average_overlaps/2) only + * works for minmax opclass, and can't be extended to minmax-multi. The + * overlaps can only consider the two extreme values (essentially treating + * the summary as a single minmax range), because that's what brinsort + * needs. But the minmax-multi range may have "gaps" (kinda the whole point + * of these opclasses), which affects matching tuples to ranges. + * + * XXX This also builds histograms of the number of matches, both for the + * raw and unique values. At the moment we don't do anything with the + * results, though (except for printing those). + */ +static void +brin_minmax_match_tuples_to_ranges2(BrinRanges *ranges, + BrinRange **minranges, BrinRange **maxranges, + int numrows, HeapTuple *rows, + int nvalues, Datum *values, + TypeCacheEntry *typcache, + BrinMinmaxStats *stats) +{ + int64 nmatches = 0; + int64 nmatches_unique = 0; + int64 nmatches_value = 0; + int64 nvalues_unique = 0; + + histogram_t *hist = histogram_init(); + histogram_t *hist_unique = histogram_init(); + TimestampTz start_ts = GetCurrentTimestamp(); + + for (int i = 0; i < nvalues; i++) + { + int start; + int end; + + CHECK_FOR_INTERRUPTS(); + + /* + * Same value as preceding, so just use the preceding count. + * We don't increment the unique counters, because this is + * a duplicate. + */ + if ((i > 0) && (range_values_cmp(&values[i-1], &values[i], typcache) == 0)) + { + nmatches += nmatches_value; + hist = histogram_add(hist, nmatches_value); + continue; + } + + nmatches_value = 0; + + start = maxval_start(maxranges, ranges->nranges, values[i], typcache); + end = minval_end(minranges, ranges->nranges, values[i], typcache); + + for (int j = start; j < ranges->nranges; j++) + { + if (maxranges[j]->min_index >= end) + continue; + + if (maxranges[j]->min_index_lowest >= end) + break; + + nmatches_value++; + } + + hist = histogram_add(hist, nmatches_value); + hist_unique = histogram_add(hist_unique, nmatches_value); + + nmatches += nmatches_value; + nmatches_unique += nmatches_value; + nvalues_unique++; + } + + if (debug_brin_stats) + { + elog(WARNING, "----- brin_minmax_match_tuples_to_ranges2 -----"); + elog(WARNING, "nmatches = %ld %f", nmatches, (double) nmatches / numrows); + elog(WARNING, "nmatches unique = %ld %ld %f", + nmatches_unique, nvalues_unique, (double) nmatches_unique / nvalues_unique); + elog(WARNING, "duration = %ld", TimestampDifferenceMilliseconds(start_ts, + GetCurrentTimestamp())); + } + + if (stats->avg_matches != (double) nmatches / numrows) + elog(ERROR, "brin_minmax_match_tuples_to_ranges2: avg_matches mismatch %f != %f", + stats->avg_matches, (double) nmatches / numrows); + + if (stats->avg_matches_unique != (double) nmatches_unique / nvalues_unique) + elog(ERROR, "brin_minmax_match_tuples_to_ranges2: avg_matches_unique mismatch %f != %f", + stats->avg_matches_unique, (double) nmatches_unique / nvalues_unique); + + pg_qsort(hist->bins, hist->nbins, sizeof(histogram_bin_t), histogram_bin_cmp); + pg_qsort(hist_unique->bins, hist_unique->nbins, sizeof(histogram_bin_t), histogram_bin_cmp); + + histogram_print(hist); + histogram_print(hist_unique); + + pfree(hist); + pfree(hist_unique); +} + +/* + * brin_minmax_match_tuples_to_ranges_bruteforce + * Match tuples to ranges, count average number of ranges per tuple. + * + * Bruteforce approach, used mostly for cross-checking. + */ +static void +brin_minmax_match_tuples_to_ranges_bruteforce(BrinRanges *ranges, + int numrows, HeapTuple *rows, + int nvalues, Datum *values, + TypeCacheEntry *typcache, + BrinMinmaxStats *stats) +{ + int64 nmatches = 0; + int64 nmatches_unique = 0; + int64 nvalues_unique = 0; + + TimestampTz start_ts = GetCurrentTimestamp(); + + for (int i = 0; i < nvalues; i++) + { + bool is_unique; + int64 nmatches_value = 0; + + CHECK_FOR_INTERRUPTS(); + + /* is this a new value? */ + is_unique = ((i == 0) || (range_values_cmp(&values[i-1], &values[i], typcache) != 0)); + + /* count unique values */ + nvalues_unique += (is_unique) ? 1 : 0; + + for (int j = 0; j < ranges->nranges; j++) + { + if (range_values_cmp(&values[i], &ranges->ranges[j].min_value, typcache) < 0) + continue; + + if (range_values_cmp(&values[i], &ranges->ranges[j].max_value, typcache) > 0) + continue; + + nmatches_value++; + } + + nmatches += nmatches_value; + nmatches_unique += (is_unique) ? nmatches_value : 0; + } + + if (debug_brin_stats) + { + elog(WARNING, "----- brin_minmax_match_tuples_to_ranges_bruteforce -----"); + elog(WARNING, "nmatches = %ld %f", nmatches, (double) nmatches / numrows); + elog(WARNING, "nmatches unique = %ld %ld %f", nmatches_unique, nvalues_unique, + (double) nmatches_unique / nvalues_unique); + elog(WARNING, "duration = %ld", TimestampDifferenceMilliseconds(start_ts, + GetCurrentTimestamp())); + } + + if (stats->avg_matches != (double) nmatches / numrows) + elog(ERROR, "brin_minmax_match_tuples_to_ranges_bruteforce: avg_matches mismatch %f != %f", + stats->avg_matches, (double) nmatches / numrows); + + if (stats->avg_matches_unique != (double) nmatches_unique / nvalues_unique) + elog(ERROR, "brin_minmax_match_tuples_to_ranges_bruteforce: avg_matches_unique mismatch %f != %f", + stats->avg_matches_unique, (double) nmatches_unique / nvalues_unique); +} +#endif + /* * brin_minmax_value_stats * Calculate statistics about minval/maxval values. @@ -803,6 +1158,198 @@ brin_minmax_increment_stats(BrinRange **minranges, BrinRange **maxranges, stats->maxval_increment_max = max_maxval; } +#ifdef DEBUG_BRIN_STATS +/* + * brin_minmax_count_overlaps2 + * Calculate number of overlaps. + * + * This uses the minranges/maxranges to quickly eliminate ranges that can't + * possibly intersect. + * + * XXX Seems rather complicated and works poorly for wide ranges (with outlier + * values), brin_minmax_count_overlaps is likely better. + */ +static void +brin_minmax_count_overlaps2(BrinRanges *ranges, + BrinRange **minranges, BrinRange **maxranges, + TypeCacheEntry *typcache, BrinMinmaxStats *stats) +{ + int64 noverlaps; + TimestampTz start_ts = GetCurrentTimestamp(); + + /* + * Walk the ranges ordered by max_values, see how many ranges overlap. + * + * Once we get to a state where (min_value > current.max_value) for + * all future ranges, we know none of them can overlap and we can + * terminate. This is what min_index_lowest is for. + * + * XXX If there are very wide ranges (with outlier min/max values), + * the min_index_lowest is going to be pretty useless, because the + * range will be sorted at the very end by max_value, but will have + * very low min_index, so this won't work. + * + * XXX We could collect a more elaborate stuff, like for example a + * histogram of number of overlaps, or maximum number of overlaps. + * So we'd have average, but then also an info if there are some + * ranges with very many overlaps. + */ + noverlaps = 0; + for (int i = 0; i < ranges->nranges; i++) + { + int idx = (i + 1); + BrinRange *ra = maxranges[i]; + uint64 min_index = ra->min_index; + + CHECK_FOR_INTERRUPTS(); + +#ifdef NOT_USED + /* + * XXX Not needed, we can just count "future" ranges and then + * we just multiply by 2. + */ + + /* + * What's the first range that might overlap with this one? + * needs to have maxval > current.minval. + */ + while (idx > 0) + { + BrinRange *rb = maxranges[idx - 1]; + + /* the range is before the current one, so can't intersect */ + if (range_values_cmp(&rb->max_value, &ra->min_value, typcache) < 0) + break; + + idx--; + } +#endif + + /* + * Find the first min_index that is higher than the max_value, + * so that we can compare that instead of the values in the + * next loop. There should be fewer value comparisons than in + * the next loop, so we'll save on function calls. + */ + while (min_index < ranges->nranges) + { + if (range_values_cmp(&minranges[min_index]->min_value, + &ra->max_value, typcache) > 0) + break; + + min_index++; + } + + /* + * Walk the following ranges (ordered by max_value), and check + * if it overlaps. If it matches, we look at the next one. If + * not, we check if there can be more ranges. + */ + for (int j = idx; j < ranges->nranges; j++) + { + BrinRange *rb = maxranges[j]; + + /* the range overlaps - just continue with the next one */ + // if (range_values_cmp(&rb->min_value, &ra->max_value, typcache) <= 0) + if (rb->min_index < min_index) + { + noverlaps++; + continue; + } + + /* + * Are there any future ranges that might overlap? We can + * check the min_index_lowest to decide quickly. + */ + if (rb->min_index_lowest >= min_index) + break; + } + } + + /* + * We only count intersect for "following" ranges when ordered by maxval, + * so we only see 1/2 the overlaps. So double the result. + */ + noverlaps *= 2; + + if (debug_brin_stats) + { + elog(WARNING, "----- brin_minmax_count_overlaps2 -----"); + elog(WARNING, "noverlaps = %ld", noverlaps); + elog(WARNING, "duration = %ld", TimestampDifferenceMilliseconds(start_ts, + GetCurrentTimestamp())); + } + + if (stats->avg_overlaps != (double) noverlaps / ranges->nranges) + elog(ERROR, "brin_minmax_count_overlaps2: mismatch %f != %f", + stats->avg_overlaps, (double) noverlaps / ranges->nranges); +} + +/* + * brin_minmax_count_overlaps_bruteforce + * Calculate number of overlaps by brute force. + * + * Actually compares every range to every other range. Quite expensive, used + * primarily to cross-check the other algorithms. + */ +static void +brin_minmax_count_overlaps_bruteforce(BrinRanges *ranges, + TypeCacheEntry *typcache, + BrinMinmaxStats *stats) +{ + int64 noverlaps; + TimestampTz start_ts = GetCurrentTimestamp(); + + /* + * Brute force calculation of overlapping ranges, comparing each + * range to every other range - bound to be pretty expensive, as + * it's pretty much O(N^2). Kept mostly for easy cross-check with + * the preceding "optimized" code. + */ + noverlaps = 0; + for (int i = 0; i < ranges->nranges; i++) + { + BrinRange *ra = &ranges->ranges[i]; + + for (int j = 0; j < ranges->nranges; j++) + { + BrinRange *rb = &ranges->ranges[j]; + + CHECK_FOR_INTERRUPTS(); + + if (i == j) + continue; + + if (range_values_cmp(&ra->max_value, &rb->min_value, typcache) < 0) + continue; + + if (range_values_cmp(&rb->max_value, &ra->min_value, typcache) < 0) + continue; + +#if 0 + elog(DEBUG1, "[%ld,%ld] overlaps [%ld,%ld]", + ra->min_value, ra->max_value, + rb->min_value, rb->max_value); +#endif + + noverlaps++; + } + } + + if (debug_brin_stats) + { + elog(WARNING, "----- brin_minmax_count_overlaps_bruteforce -----"); + elog(WARNING, "noverlaps = %ld", noverlaps); + elog(WARNING, "duration = %ld", TimestampDifferenceMilliseconds(start_ts, + GetCurrentTimestamp())); + } + + if (stats->avg_overlaps != (double) noverlaps / ranges->nranges) + elog(ERROR, "brin_minmax_count_overlaps2: mismatch %f != %f", + stats->avg_overlaps, (double) noverlaps / ranges->nranges); +} +#endif + /* * brin_minmax_stats * Calculate custom statistics for a BRIN minmax index. @@ -814,6 +1361,11 @@ brin_minmax_increment_stats(BrinRange **minranges, BrinRange **maxranges, * - average number of rows matching a range * - number of distinct minval/maxval values * + * There are multiple ways to calculate some of the metrics, so to allow + * cross-checking during development it's possible to run and compare all. + * To do that, define STATS_CROSS_CHECK. There's also STATS_DEBUG define + * that simply prints the calculated results. + * * XXX This could also calculate correlation of the range minval, so that * we can estimate how much random I/O will happen during the BrinSort. * And perhaps we should also sort the ranges by (minval,block_start) to @@ -1141,6 +1693,14 @@ brin_minmax_stats(PG_FUNCTION_ARGS) /* calculate average number of overlapping ranges for any range */ brin_minmax_count_overlaps(minranges, ranges->nranges, typcache, stats); +#ifdef DEBUG_BRIN_STATS + if (debug_brin_cross_check) + { + brin_minmax_count_overlaps2(ranges, minranges, maxranges, typcache, stats); + brin_minmax_count_overlaps_bruteforce(ranges, typcache, stats); + } +#endif + /* calculate minval/maxval stats (distinct values and correlation) */ brin_minmax_value_stats(minranges, maxranges, ranges->nranges, typcache, stats); @@ -1206,6 +1766,20 @@ brin_minmax_stats(PG_FUNCTION_ARGS) numrows, rows, nvalues, values, typcache, stats); +#ifdef DEBUG_BRIN_STATS + if (debug_brin_cross_check) + { + brin_minmax_match_tuples_to_ranges2(ranges, minranges, maxranges, + numrows, rows, nvalues, values, + typcache, stats); + + brin_minmax_match_tuples_to_ranges_bruteforce(ranges, + numrows, rows, + nvalues, values, + typcache, stats); + } +#endif + brin_minmax_increment_stats(minranges, maxranges, ranges->nranges, values, nvalues, typcache, stats); } diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 6abefe24be3..9748a3bfcc5 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -99,6 +99,7 @@ extern bool synchronize_seqscans; #ifdef DEBUG_BRIN_STATS extern bool debug_brin_stats; +extern bool debug_brin_cross_check; #endif #ifdef TRACE_SYNCSCAN @@ -1247,6 +1248,15 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { + {"debug_brin_cross_check", PGC_USERSET, DEVELOPER_OPTIONS, + gettext_noop("Cross-check calculation of BRIN statistics."), + NULL + }, + &debug_brin_cross_check, + false, + NULL, NULL, NULL + }, #endif { -- 2.39.2