Re: should check interrupts in BuildRelationExtStatistics ? - Mailing list pgsql-hackers

From Tom Lane
Subject Re: should check interrupts in BuildRelationExtStatistics ?
Date
Msg-id 3081435.1657223451@sss.pgh.pa.us
Whole thread Raw
In response to Re: should check interrupts in BuildRelationExtStatistics ?  (Thomas Munro <thomas.munro@gmail.com>)
Responses Re: should check interrupts in BuildRelationExtStatistics ?
List pgsql-hackers
Thomas Munro <thomas.munro@gmail.com> writes:
> On Wed, Jul 6, 2022 at 11:37 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
>> qsort_interruptible

> +1

So here's a patch that does it that way.  I first meant to put the
new file into src/port/, but after remembering that that directory
has no backend-only functions, I went with src/backend/utils/sort/
instead.

For the moment I contented myself with changing qsort[_arg] calls
that occur during statistics collection.  We have a lot more of course,
but many of them can be expected to not be dealing with much data,
and in some cases we might want some closer analysis to be sure there's
no performance hit.  So I'm inclined to not worry too much about the
remaining qsort calls until somebody complains.

This could be back-patched to v14 without much worry, I should think.

            regards, tom lane

diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 07613c38b5..a7966fff83 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -100,7 +100,7 @@ static VacAttrStats *examine_attribute(Relation onerel, int attnum,
 static int    acquire_sample_rows(Relation onerel, int elevel,
                                 HeapTuple *rows, int targrows,
                                 double *totalrows, double *totaldeadrows);
-static int    compare_rows(const void *a, const void *b);
+static int    compare_rows(const void *a, const void *b, void *arg);
 static int    acquire_inherited_sample_rows(Relation onerel, int elevel,
                                           HeapTuple *rows, int targrows,
                                           double *totalrows, double *totaldeadrows);
@@ -1306,7 +1306,8 @@ acquire_sample_rows(Relation onerel, int elevel,
      * tuples are already sorted.
      */
     if (numrows == targrows)
-        qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
+        qsort_interruptible((void *) rows, numrows, sizeof(HeapTuple),
+                            compare_rows, NULL);

     /*
      * Estimate total numbers of live and dead rows in relation, extrapolating
@@ -1342,10 +1343,10 @@ acquire_sample_rows(Relation onerel, int elevel,
 }

 /*
- * qsort comparator for sorting rows[] array
+ * Comparator for sorting rows[] array
  */
 static int
-compare_rows(const void *a, const void *b)
+compare_rows(const void *a, const void *b, void *arg)
 {
     HeapTuple    ha = *(const HeapTuple *) a;
     HeapTuple    hb = *(const HeapTuple *) b;
@@ -1836,7 +1837,7 @@ static void compute_scalar_stats(VacAttrStatsP stats,
                                  int samplerows,
                                  double totalrows);
 static int    compare_scalars(const void *a, const void *b, void *arg);
-static int    compare_mcvs(const void *a, const void *b);
+static int    compare_mcvs(const void *a, const void *b, void *arg);
 static int    analyze_mcv_list(int *mcv_counts,
                              int num_mcv,
                              double stadistinct,
@@ -2473,8 +2474,8 @@ compute_scalar_stats(VacAttrStatsP stats,
         /* Sort the collected values */
         cxt.ssup = &ssup;
         cxt.tupnoLink = tupnoLink;
-        qsort_arg((void *) values, values_cnt, sizeof(ScalarItem),
-                  compare_scalars, (void *) &cxt);
+        qsort_interruptible((void *) values, values_cnt, sizeof(ScalarItem),
+                            compare_scalars, (void *) &cxt);

         /*
          * Now scan the values in order, find the most common ones, and also
@@ -2718,8 +2719,8 @@ compute_scalar_stats(VacAttrStatsP stats,
                         deltafrac;

             /* Sort the MCV items into position order to speed next loop */
-            qsort((void *) track, num_mcv,
-                  sizeof(ScalarMCVItem), compare_mcvs);
+            qsort_interruptible((void *) track, num_mcv, sizeof(ScalarMCVItem),
+                                compare_mcvs, NULL);

             /*
              * Collapse out the MCV items from the values[] array.
@@ -2882,7 +2883,7 @@ compute_scalar_stats(VacAttrStatsP stats,
 }

 /*
- * qsort_arg comparator for sorting ScalarItems
+ * Comparator for sorting ScalarItems
  *
  * Aside from sorting the items, we update the tupnoLink[] array
  * whenever two ScalarItems are found to contain equal datums.  The array
@@ -2919,10 +2920,10 @@ compare_scalars(const void *a, const void *b, void *arg)
 }

 /*
- * qsort comparator for sorting ScalarMCVItems by position
+ * Comparator for sorting ScalarMCVItems by position
  */
 static int
-compare_mcvs(const void *a, const void *b)
+compare_mcvs(const void *a, const void *b, void *arg)
 {
     int            da = ((const ScalarMCVItem *) a)->first;
     int            db = ((const ScalarMCVItem *) b)->first;
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 7c02fb279f..d2aa8d0ca3 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -1129,8 +1129,8 @@ build_sorted_items(StatsBuildData *data, int *nitems,
     }

     /* do the sort, using the multi-sort */
-    qsort_arg((void *) items, nrows, sizeof(SortItem),
-              multi_sort_compare, mss);
+    qsort_interruptible((void *) items, nrows, sizeof(SortItem),
+                        multi_sort_compare, mss);

     return items;
 }
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index 1ef3034428..f10642df4f 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -404,7 +404,7 @@ count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
  *        order.
  */
 static int
-compare_sort_item_count(const void *a, const void *b)
+compare_sort_item_count(const void *a, const void *b, void *arg)
 {
     SortItem   *ia = (SortItem *) a;
     SortItem   *ib = (SortItem *) b;
@@ -457,8 +457,8 @@ build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,
     Assert(j + 1 == ngroups);

     /* Sort the distinct groups by frequency (in descending order). */
-    pg_qsort((void *) groups, ngroups, sizeof(SortItem),
-             compare_sort_item_count);
+    qsort_interruptible((void *) groups, ngroups, sizeof(SortItem),
+                        compare_sort_item_count, NULL);

     *ndistinct = ngroups;
     return groups;
@@ -528,8 +528,8 @@ build_column_frequencies(SortItem *groups, int ngroups,
         }

         /* sort the values, deduplicate */
-        qsort_arg((void *) result[dim], ngroups, sizeof(SortItem),
-                  sort_item_compare, ssup);
+        qsort_interruptible((void *) result[dim], ngroups, sizeof(SortItem),
+                            sort_item_compare, ssup);

         /*
          * Identify distinct values, compute frequency (there might be
@@ -696,8 +696,8 @@ statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)

         PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);

-        qsort_arg(values[dim], counts[dim], sizeof(Datum),
-                  compare_scalars_simple, &ssup[dim]);
+        qsort_interruptible(values[dim], counts[dim], sizeof(Datum),
+                            compare_scalars_simple, &ssup[dim]);

         /*
          * Walk through the array and eliminate duplicate values, but keep the
diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c
index 6ade5eff78..9b216af517 100644
--- a/src/backend/statistics/mvdistinct.c
+++ b/src/backend/statistics/mvdistinct.c
@@ -489,8 +489,8 @@ ndistinct_for_combination(double totalrows, StatsBuildData *data,
     }

     /* We can sort the array now ... */
-    qsort_arg((void *) items, numrows, sizeof(SortItem),
-              multi_sort_compare, mss);
+    qsort_interruptible((void *) items, numrows, sizeof(SortItem),
+                        multi_sort_compare, mss);

     /* ... and count the number of distinct combinations */

diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c
index 3e1a61b56d..e771a7cd62 100644
--- a/src/backend/tsearch/ts_typanalyze.c
+++ b/src/backend/tsearch/ts_typanalyze.c
@@ -44,8 +44,10 @@ static void prune_lexemes_hashtable(HTAB *lexemes_tab, int b_current);
 static uint32 lexeme_hash(const void *key, Size keysize);
 static int    lexeme_match(const void *key1, const void *key2, Size keysize);
 static int    lexeme_compare(const void *key1, const void *key2);
-static int    trackitem_compare_frequencies_desc(const void *e1, const void *e2);
-static int    trackitem_compare_lexemes(const void *e1, const void *e2);
+static int    trackitem_compare_frequencies_desc(const void *e1, const void *e2,
+                                               void *arg);
+static int    trackitem_compare_lexemes(const void *e1, const void *e2,
+                                      void *arg);


 /*
@@ -347,8 +349,8 @@ compute_tsvector_stats(VacAttrStats *stats,
          */
         if (num_mcelem < track_len)
         {
-            qsort(sort_table, track_len, sizeof(TrackItem *),
-                  trackitem_compare_frequencies_desc);
+            qsort_interruptible(sort_table, track_len, sizeof(TrackItem *),
+                                trackitem_compare_frequencies_desc, NULL);
             /* reset minfreq to the smallest frequency we're keeping */
             minfreq = sort_table[num_mcelem - 1]->frequency;
         }
@@ -376,8 +378,8 @@ compute_tsvector_stats(VacAttrStats *stats,
              * presorted we can employ binary search for that.  See
              * ts_selfuncs.c for a real usage scenario.
              */
-            qsort(sort_table, num_mcelem, sizeof(TrackItem *),
-                  trackitem_compare_lexemes);
+            qsort_interruptible(sort_table, num_mcelem, sizeof(TrackItem *),
+                                trackitem_compare_lexemes, NULL);

             /* Must copy the target values into anl_context */
             old_context = MemoryContextSwitchTo(stats->anl_context);
@@ -510,10 +512,10 @@ lexeme_compare(const void *key1, const void *key2)
 }

 /*
- *    qsort() comparator for sorting TrackItems on frequencies (descending sort)
+ *    Comparator for sorting TrackItems on frequencies (descending sort)
  */
 static int
-trackitem_compare_frequencies_desc(const void *e1, const void *e2)
+trackitem_compare_frequencies_desc(const void *e1, const void *e2, void *arg)
 {
     const TrackItem *const *t1 = (const TrackItem *const *) e1;
     const TrackItem *const *t2 = (const TrackItem *const *) e2;
@@ -522,10 +524,10 @@ trackitem_compare_frequencies_desc(const void *e1, const void *e2)
 }

 /*
- *    qsort() comparator for sorting TrackItems on lexemes
+ *    Comparator for sorting TrackItems on lexemes
  */
 static int
-trackitem_compare_lexemes(const void *e1, const void *e2)
+trackitem_compare_lexemes(const void *e1, const void *e2, void *arg)
 {
     const TrackItem *const *t1 = (const TrackItem *const *) e1;
     const TrackItem *const *t2 = (const TrackItem *const *) e2;
diff --git a/src/backend/utils/adt/array_typanalyze.c b/src/backend/utils/adt/array_typanalyze.c
index ab1d69b2f4..1964cedd93 100644
--- a/src/backend/utils/adt/array_typanalyze.c
+++ b/src/backend/utils/adt/array_typanalyze.c
@@ -86,9 +86,9 @@ static void prune_element_hashtable(HTAB *elements_tab, int b_current);
 static uint32 element_hash(const void *key, Size keysize);
 static int    element_match(const void *key1, const void *key2, Size keysize);
 static int    element_compare(const void *key1, const void *key2);
-static int    trackitem_compare_frequencies_desc(const void *e1, const void *e2);
-static int    trackitem_compare_element(const void *e1, const void *e2);
-static int    countitem_compare_count(const void *e1, const void *e2);
+static int    trackitem_compare_frequencies_desc(const void *e1, const void *e2, void *arg);
+static int    trackitem_compare_element(const void *e1, const void *e2, void *arg);
+static int    countitem_compare_count(const void *e1, const void *e2, void *arg);


 /*
@@ -502,8 +502,8 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
          */
         if (num_mcelem < track_len)
         {
-            qsort(sort_table, track_len, sizeof(TrackItem *),
-                  trackitem_compare_frequencies_desc);
+            qsort_interruptible(sort_table, track_len, sizeof(TrackItem *),
+                                trackitem_compare_frequencies_desc, NULL);
             /* reset minfreq to the smallest frequency we're keeping */
             minfreq = sort_table[num_mcelem - 1]->frequency;
         }
@@ -522,8 +522,8 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
              * the element type's default comparison function.  This permits
              * fast binary searches in selectivity estimation functions.
              */
-            qsort(sort_table, num_mcelem, sizeof(TrackItem *),
-                  trackitem_compare_element);
+            qsort_interruptible(sort_table, num_mcelem, sizeof(TrackItem *),
+                                trackitem_compare_element, NULL);

             /* Must copy the target values into anl_context */
             old_context = MemoryContextSwitchTo(stats->anl_context);
@@ -599,8 +599,9 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
             {
                 sorted_count_items[j++] = count_item;
             }
-            qsort(sorted_count_items, count_items_count,
-                  sizeof(DECountItem *), countitem_compare_count);
+            qsort_interruptible(sorted_count_items, count_items_count,
+                                sizeof(DECountItem *),
+                                countitem_compare_count, NULL);

             /*
              * Prepare to fill stanumbers with the histogram, followed by the
@@ -751,10 +752,10 @@ element_compare(const void *key1, const void *key2)
 }

 /*
- * qsort() comparator for sorting TrackItems by frequencies (descending sort)
+ * Comparator for sorting TrackItems by frequencies (descending sort)
  */
 static int
-trackitem_compare_frequencies_desc(const void *e1, const void *e2)
+trackitem_compare_frequencies_desc(const void *e1, const void *e2, void *arg)
 {
     const TrackItem *const *t1 = (const TrackItem *const *) e1;
     const TrackItem *const *t2 = (const TrackItem *const *) e2;
@@ -763,10 +764,10 @@ trackitem_compare_frequencies_desc(const void *e1, const void *e2)
 }

 /*
- * qsort() comparator for sorting TrackItems by element values
+ * Comparator for sorting TrackItems by element values
  */
 static int
-trackitem_compare_element(const void *e1, const void *e2)
+trackitem_compare_element(const void *e1, const void *e2, void *arg)
 {
     const TrackItem *const *t1 = (const TrackItem *const *) e1;
     const TrackItem *const *t2 = (const TrackItem *const *) e2;
@@ -775,10 +776,10 @@ trackitem_compare_element(const void *e1, const void *e2)
 }

 /*
- * qsort() comparator for sorting DECountItems by count
+ * Comparator for sorting DECountItems by count
  */
 static int
-countitem_compare_count(const void *e1, const void *e2)
+countitem_compare_count(const void *e1, const void *e2, void *arg)
 {
     const DECountItem *const *t1 = (const DECountItem *const *) e1;
     const DECountItem *const *t2 = (const DECountItem *const *) e2;
diff --git a/src/backend/utils/adt/rangetypes_typanalyze.c b/src/backend/utils/adt/rangetypes_typanalyze.c
index 2b77e03449..2043d3f98b 100644
--- a/src/backend/utils/adt/rangetypes_typanalyze.c
+++ b/src/backend/utils/adt/rangetypes_typanalyze.c
@@ -32,7 +32,7 @@
 #include "utils/rangetypes.h"
 #include "utils/multirangetypes.h"

-static int    float8_qsort_cmp(const void *a1, const void *a2);
+static int    float8_qsort_cmp(const void *a1, const void *a2, void *arg);
 static int    range_bound_qsort_cmp(const void *a1, const void *a2, void *arg);
 static void compute_range_stats(VacAttrStats *stats,
                                 AnalyzeAttrFetchFunc fetchfunc, int samplerows,
@@ -93,7 +93,7 @@ multirange_typanalyze(PG_FUNCTION_ARGS)
  * Comparison function for sorting float8s, used for range lengths.
  */
 static int
-float8_qsort_cmp(const void *a1, const void *a2)
+float8_qsort_cmp(const void *a1, const void *a2, void *arg)
 {
     const float8 *f1 = (const float8 *) a1;
     const float8 *f2 = (const float8 *) a2;
@@ -280,10 +280,10 @@ compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
         if (non_empty_cnt >= 2)
         {
             /* Sort bound values */
-            qsort_arg(lowers, non_empty_cnt, sizeof(RangeBound),
-                      range_bound_qsort_cmp, typcache);
-            qsort_arg(uppers, non_empty_cnt, sizeof(RangeBound),
-                      range_bound_qsort_cmp, typcache);
+            qsort_interruptible(lowers, non_empty_cnt, sizeof(RangeBound),
+                                range_bound_qsort_cmp, typcache);
+            qsort_interruptible(uppers, non_empty_cnt, sizeof(RangeBound),
+                                range_bound_qsort_cmp, typcache);

             num_hist = non_empty_cnt;
             if (num_hist > num_bins)
@@ -345,7 +345,8 @@ compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
              * Ascending sort of range lengths for further filling of
              * histogram
              */
-            qsort(lengths, non_empty_cnt, sizeof(float8), float8_qsort_cmp);
+            qsort_interruptible(lengths, non_empty_cnt, sizeof(float8),
+                                float8_qsort_cmp, NULL);

             num_hist = non_empty_cnt;
             if (num_hist > num_bins)
diff --git a/src/backend/utils/sort/Makefile b/src/backend/utils/sort/Makefile
index 26f65fcaf7..2c31fd453d 100644
--- a/src/backend/utils/sort/Makefile
+++ b/src/backend/utils/sort/Makefile
@@ -16,6 +16,7 @@ override CPPFLAGS := -I. -I$(srcdir) $(CPPFLAGS)

 OBJS = \
     logtape.o \
+    qsort_interruptible.o \
     sharedtuplestore.o \
     sortsupport.o \
     tuplesort.o \
diff --git a/src/backend/utils/sort/qsort_interruptible.c b/src/backend/utils/sort/qsort_interruptible.c
new file mode 100644
index 0000000000..f179b25624
--- /dev/null
+++ b/src/backend/utils/sort/qsort_interruptible.c
@@ -0,0 +1,16 @@
+/*
+ *    qsort_interruptible.c: qsort_arg that includes CHECK_FOR_INTERRUPTS
+ */
+
+#include "postgres.h"
+#include "miscadmin.h"
+
+#define ST_SORT qsort_interruptible
+#define ST_ELEMENT_TYPE_VOID
+#define ST_COMPARATOR_TYPE_NAME qsort_arg_comparator
+#define ST_COMPARE_RUNTIME_POINTER
+#define ST_COMPARE_ARG_TYPE void
+#define ST_SCOPE
+#define ST_DEFINE
+#define ST_CHECK_FOR_INTERRUPTS
+#include "lib/sort_template.h"
diff --git a/src/include/port.h b/src/include/port.h
index 12c05b5d9f..e647f62b77 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -499,6 +499,9 @@ typedef int (*qsort_arg_comparator) (const void *a, const void *b, void *arg);
 extern void qsort_arg(void *base, size_t nel, size_t elsize,
                       qsort_arg_comparator cmp, void *arg);

+extern void qsort_interruptible(void *base, size_t nel, size_t elsize,
+                                qsort_arg_comparator cmp, void *arg);
+
 extern void *bsearch_arg(const void *key, const void *base,
                          size_t nmemb, size_t size,
                          int (*compar) (const void *, const void *, void *),

pgsql-hackers by date:

Previous
From: Joe Conway
Date:
Subject: Re: pg_parameter_aclcheck() and trusted extensions
Next
From: Joe Conway
Date:
Subject: Re: Patch proposal: New hooks in the connection path