Re: POC: converting Lists into arrays - Mailing list pgsql-hackers

From Tom Lane
Subject Re: POC: converting Lists into arrays
Date
Msg-id 29361.1563220190@sss.pgh.pa.us
Whole thread Raw
In response to Re: POC: converting Lists into arrays  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: POC: converting Lists into arrays  (Tom Lane <tgl@sss.pgh.pa.us>)
Re: POC: converting Lists into arrays  (David Rowley <david.rowley@2ndquadrant.com>)
List pgsql-hackers
I wrote:
> BTW, further on the subject of performance --- I'm aware of at least
> these topics for follow-on patches:
> ...
> * Adjust API for list_qsort(), as discussed, to save indirections and
> avoid constructing an intermediate pointer array.  I also seem to recall
> one place in the planner that's avoiding using list_qsort by manually
> flattening a list into an array, qsort'ing, and rebuilding the list :-(

Here's a proposed patch for that.  There are only two existing calls
of list_qsort(), it turns out.  I didn't find the described spot in
the planner (I believe I was thinking of choose_bitmap_and(), but its
usage isn't quite as described).  However, I found about four other
places that were doing pretty much exactly that, so the attached
also simplifies those places to use list_qsort().

(There are some other places that could perhaps be changed also,
but it would require more invasive hacking than I wanted to do here,
with less-clear benefits.)

A possibly controversial point is that I made list_qsort() sort the
given list in-place, rather than building a new list as it has
historically.  For every single one of the existing and new callers,
copying the input list is wasteful, because they were just leaking
the input list anyway.  But perhaps somebody feels that we should
preserve the "const input" property?  I thought that changing the
function to return void, as done here, might be a good idea to
ensure that callers notice its API changed --- otherwise they'd
only get a warning about incompatible signature of the passed
function pointer, which they might not notice; in fact I'm not
totally sure all compilers would even give such a warning.

If there's not complaints about that, I'm just going to go ahead
and push this --- it seems simple enough to not need much review.

            regards, tom lane

diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c
index ecc2911..8a7eb6a 100644
--- a/src/backend/nodes/list.c
+++ b/src/backend/nodes/list.c
@@ -1421,43 +1421,22 @@ list_copy_deep(const List *oldlist)
 /*
  * Sort a list as though by qsort.
  *
- * A new list is built and returned.  Like list_copy, this doesn't make
- * fresh copies of any pointed-to data.
+ * The list is sorted in-place (this is a change from pre-v13 Postgres).
  *
- * The comparator function receives arguments of type ListCell **.
- * (XXX that's really inefficient now, but changing it seems like
- * material for a standalone patch.)
+ * The comparator function is declared to receive arguments of type
+ * const ListCell *; this allows it to use lfirst() and variants
+ * without casting its arguments.
  */
-List *
-list_qsort(const List *list, list_qsort_comparator cmp)
+void
+list_qsort(List *list, list_qsort_comparator cmp)
 {
-    int            len = list_length(list);
-    ListCell  **list_arr;
-    List       *newlist;
-    ListCell   *cell;
-    int            i;
-
-    /* Empty list is easy */
-    if (len == 0)
-        return NIL;
-
-    /* We have to make an array of pointers to satisfy the API */
-    list_arr = (ListCell **) palloc(sizeof(ListCell *) * len);
-    i = 0;
-    foreach(cell, list)
-        list_arr[i++] = cell;
-
-    qsort(list_arr, len, sizeof(ListCell *), cmp);
-
-    /* Construct new list (this code is much like list_copy) */
-    newlist = new_list(list->type, len);
-
-    for (i = 0; i < len; i++)
-        newlist->elements[i] = *list_arr[i];
+    typedef int (*qsort_comparator) (const void *a, const void *b);
+    int            len;

-    /* Might as well free the workspace array */
-    pfree(list_arr);
+    check_list_invariants(list);

-    check_list_invariants(newlist);
-    return newlist;
+    /* Nothing to do if there's less than two elements */
+    len = list_length(list);
+    if (len > 1)
+        qsort(list->elements, len, sizeof(ListCell), (qsort_comparator) cmp);
 }
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 67254c2..286d8de 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -52,8 +52,8 @@ typedef enum
 #define STD_FUZZ_FACTOR 1.01

 static List *translate_sub_tlist(List *tlist, int relid);
-static int    append_total_cost_compare(const void *a, const void *b);
-static int    append_startup_cost_compare(const void *a, const void *b);
+static int    append_total_cost_compare(const ListCell *a, const ListCell *b);
+static int    append_startup_cost_compare(const ListCell *a, const ListCell *b);
 static List *reparameterize_pathlist_by_child(PlannerInfo *root,
                                               List *pathlist,
                                               RelOptInfo *child_rel);
@@ -1239,9 +1239,8 @@ create_append_path(PlannerInfo *root,
          */
         Assert(pathkeys == NIL);

-        subpaths = list_qsort(subpaths, append_total_cost_compare);
-        partial_subpaths = list_qsort(partial_subpaths,
-                                      append_startup_cost_compare);
+        list_qsort(subpaths, append_total_cost_compare);
+        list_qsort(partial_subpaths, append_startup_cost_compare);
     }
     pathnode->first_partial_path = list_length(subpaths);
     pathnode->subpaths = list_concat(subpaths, partial_subpaths);
@@ -1303,10 +1302,10 @@ create_append_path(PlannerInfo *root,
  * (This is to avoid getting unpredictable results from qsort.)
  */
 static int
-append_total_cost_compare(const void *a, const void *b)
+append_total_cost_compare(const ListCell *a, const ListCell *b)
 {
-    Path       *path1 = (Path *) lfirst(*(ListCell **) a);
-    Path       *path2 = (Path *) lfirst(*(ListCell **) b);
+    Path       *path1 = (Path *) lfirst(a);
+    Path       *path2 = (Path *) lfirst(b);
     int            cmp;

     cmp = compare_path_costs(path1, path2, TOTAL_COST);
@@ -1324,10 +1323,10 @@ append_total_cost_compare(const void *a, const void *b)
  * (This is to avoid getting unpredictable results from qsort.)
  */
 static int
-append_startup_cost_compare(const void *a, const void *b)
+append_startup_cost_compare(const ListCell *a, const ListCell *b)
 {
-    Path       *path1 = (Path *) lfirst(*(ListCell **) a);
-    Path       *path2 = (Path *) lfirst(*(ListCell **) b);
+    Path       *path1 = (Path *) lfirst(a);
+    Path       *path2 = (Path *) lfirst(b);
     int            cmp;

     cmp = compare_path_costs(path1, path2, STARTUP_COST);
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index 8dc3793..7f65b09 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -1722,11 +1722,12 @@ expand_groupingset_node(GroupingSet *gs)
     return result;
 }

+/* list_qsort comparator to sort sub-lists by length */
 static int
-cmp_list_len_asc(const void *a, const void *b)
+cmp_list_len_asc(const ListCell *a, const ListCell *b)
 {
-    int            la = list_length(*(List *const *) a);
-    int            lb = list_length(*(List *const *) b);
+    int            la = list_length((const List *) lfirst(a));
+    int            lb = list_length((const List *) lfirst(b));

     return (la > lb) ? 1 : (la == lb) ? 0 : -1;
 }
@@ -1797,27 +1798,8 @@ expand_grouping_sets(List *groupingSets, int limit)
         result = new_result;
     }

-    if (list_length(result) > 1)
-    {
-        int            result_len = list_length(result);
-        List      **buf = palloc(sizeof(List *) * result_len);
-        List      **ptr = buf;
-
-        foreach(lc, result)
-        {
-            *ptr++ = lfirst(lc);
-        }
-
-        qsort(buf, result_len, sizeof(List *), cmp_list_len_asc);
-
-        result = NIL;
-        ptr = buf;
-
-        while (result_len-- > 0)
-            result = lappend(result, *ptr++);
-
-        pfree(buf);
-    }
+    /* Now sort the lists by length */
+    list_qsort(result, cmp_list_len_asc);

     return result;
 }
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index d8c370e..2924ec0 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -70,7 +70,7 @@ static void base_backup_cleanup(int code, Datum arg);
 static void perform_base_backup(basebackup_options *opt);
 static void parse_basebackup_options(List *options, basebackup_options *opt);
 static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
-static int    compareWalFileNames(const void *a, const void *b);
+static int    compareWalFileNames(const ListCell *a, const ListCell *b);
 static void throttle(size_t increment);
 static bool is_checksummed_file(const char *fullpath, const char *filename);

@@ -379,13 +379,10 @@ perform_base_backup(basebackup_options *opt)
         struct stat statbuf;
         List       *historyFileList = NIL;
         List       *walFileList = NIL;
-        char      **walFiles;
-        int            nWalFiles;
         char        firstoff[MAXFNAMELEN];
         char        lastoff[MAXFNAMELEN];
         DIR           *dir;
         struct dirent *de;
-        int            i;
         ListCell   *lc;
         TimeLineID    tli;

@@ -428,24 +425,17 @@ perform_base_backup(basebackup_options *opt)
         CheckXLogRemoved(startsegno, ThisTimeLineID);

         /*
-         * Put the WAL filenames into an array, and sort. We send the files in
-         * order from oldest to newest, to reduce the chance that a file is
-         * recycled before we get a chance to send it over.
+         * Sort the WAL filenames.  We want to send the files in order from
+         * oldest to newest, to reduce the chance that a file is recycled
+         * before we get a chance to send it over.
          */
-        nWalFiles = list_length(walFileList);
-        walFiles = palloc(nWalFiles * sizeof(char *));
-        i = 0;
-        foreach(lc, walFileList)
-        {
-            walFiles[i++] = lfirst(lc);
-        }
-        qsort(walFiles, nWalFiles, sizeof(char *), compareWalFileNames);
+        list_qsort(walFileList, compareWalFileNames);

         /*
          * There must be at least one xlog file in the pg_wal directory, since
          * we are doing backup-including-xlog.
          */
-        if (nWalFiles < 1)
+        if (walFileList == NIL)
             ereport(ERROR,
                     (errmsg("could not find any WAL files")));

@@ -453,7 +443,8 @@ perform_base_backup(basebackup_options *opt)
          * Sanity check: the first and last segment should cover startptr and
          * endptr, with no gaps in between.
          */
-        XLogFromFileName(walFiles[0], &tli, &segno, wal_segment_size);
+        XLogFromFileName((char *) linitial(walFileList),
+                         &tli, &segno, wal_segment_size);
         if (segno != startsegno)
         {
             char        startfname[MAXFNAMELEN];
@@ -463,12 +454,13 @@ perform_base_backup(basebackup_options *opt)
             ereport(ERROR,
                     (errmsg("could not find WAL file \"%s\"", startfname)));
         }
-        for (i = 0; i < nWalFiles; i++)
+        foreach(lc, walFileList)
         {
+            char       *walFileName = (char *) lfirst(lc);
             XLogSegNo    currsegno = segno;
             XLogSegNo    nextsegno = segno + 1;

-            XLogFromFileName(walFiles[i], &tli, &segno, wal_segment_size);
+            XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
             if (!(nextsegno == segno || currsegno == segno))
             {
                 char        nextfname[MAXFNAMELEN];
@@ -489,15 +481,16 @@ perform_base_backup(basebackup_options *opt)
         }

         /* Ok, we have everything we need. Send the WAL files. */
-        for (i = 0; i < nWalFiles; i++)
+        foreach(lc, walFileList)
         {
+            char       *walFileName = (char *) lfirst(lc);
             FILE       *fp;
             char        buf[TAR_SEND_SIZE];
             size_t        cnt;
             pgoff_t        len = 0;

-            snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFiles[i]);
-            XLogFromFileName(walFiles[i], &tli, &segno, wal_segment_size);
+            snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFileName);
+            XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);

             fp = AllocateFile(pathbuf, "rb");
             if (fp == NULL)
@@ -527,7 +520,7 @@ perform_base_backup(basebackup_options *opt)
                 CheckXLogRemoved(segno, tli);
                 ereport(ERROR,
                         (errcode_for_file_access(),
-                         errmsg("unexpected WAL file size \"%s\"", walFiles[i])));
+                         errmsg("unexpected WAL file size \"%s\"", walFileName)));
             }

             /* send the WAL file itself */
@@ -555,7 +548,7 @@ perform_base_backup(basebackup_options *opt)
                 CheckXLogRemoved(segno, tli);
                 ereport(ERROR,
                         (errcode_for_file_access(),
-                         errmsg("unexpected WAL file size \"%s\"", walFiles[i])));
+                         errmsg("unexpected WAL file size \"%s\"", walFileName)));
             }

             /* wal_segment_size is a multiple of 512, so no need for padding */
@@ -568,7 +561,7 @@ perform_base_backup(basebackup_options *opt)
              * walreceiver.c always doing an XLogArchiveForceDone() after a
              * complete segment.
              */
-            StatusFilePath(pathbuf, walFiles[i], ".done");
+            StatusFilePath(pathbuf, walFileName, ".done");
             sendFileWithContent(pathbuf, "");
         }

@@ -618,14 +611,14 @@ perform_base_backup(basebackup_options *opt)
 }

 /*
- * qsort comparison function, to compare log/seg portion of WAL segment
+ * list_qsort comparison function, to compare log/seg portion of WAL segment
  * filenames, ignoring the timeline portion.
  */
 static int
-compareWalFileNames(const void *a, const void *b)
+compareWalFileNames(const ListCell *a, const ListCell *b)
 {
-    char       *fna = *((char **) a);
-    char       *fnb = *((char **) b);
+    char       *fna = (char *) lfirst(a);
+    char       *fnb = (char *) lfirst(b);

     return strcmp(fna + 8, fnb + 8);
 }
diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index 591377d..d7ee8eb 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -3378,13 +3378,13 @@ TransactionIdInArray(TransactionId xid, TransactionId *xip, Size num)
 }

 /*
- * qsort() comparator for sorting RewriteMappingFiles in LSN order.
+ * list_qsort() comparator for sorting RewriteMappingFiles in LSN order.
  */
 static int
-file_sort_by_lsn(const void *a_p, const void *b_p)
+file_sort_by_lsn(const ListCell *a_p, const ListCell *b_p)
 {
-    RewriteMappingFile *a = *(RewriteMappingFile **) a_p;
-    RewriteMappingFile *b = *(RewriteMappingFile **) b_p;
+    RewriteMappingFile *a = (RewriteMappingFile *) lfirst(a_p);
+    RewriteMappingFile *b = (RewriteMappingFile *) lfirst(b_p);

     if (a->lsn < b->lsn)
         return -1;
@@ -3404,8 +3404,6 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
     struct dirent *mapping_de;
     List       *files = NIL;
     ListCell   *file;
-    RewriteMappingFile **files_a;
-    size_t        off;
     Oid            dboid = IsSharedRelation(relid) ? InvalidOid : MyDatabaseId;

     mapping_dir = AllocateDir("pg_logical/mappings");
@@ -3459,21 +3457,12 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
     }
     FreeDir(mapping_dir);

-    /* build array we can easily sort */
-    files_a = palloc(list_length(files) * sizeof(RewriteMappingFile *));
-    off = 0;
-    foreach(file, files)
-    {
-        files_a[off++] = lfirst(file);
-    }
-
     /* sort files so we apply them in LSN order */
-    qsort(files_a, list_length(files), sizeof(RewriteMappingFile *),
-          file_sort_by_lsn);
+    list_qsort(files, file_sort_by_lsn);

-    for (off = 0; off < list_length(files); off++)
+    foreach(file, files)
     {
-        RewriteMappingFile *f = files_a[off];
+        RewriteMappingFile *f = (RewriteMappingFile *) lfirst(file);

         elog(DEBUG1, "applying mapping: \"%s\" in %u", f->fname,
              snapshot->subxip[0]);
diff --git a/src/backend/rewrite/rowsecurity.c b/src/backend/rewrite/rowsecurity.c
index 300af6f..2815300 100644
--- a/src/backend/rewrite/rowsecurity.c
+++ b/src/backend/rewrite/rowsecurity.c
@@ -63,9 +63,9 @@ static void get_policies_for_relation(Relation relation,
                                       List **permissive_policies,
                                       List **restrictive_policies);

-static List *sort_policies_by_name(List *policies);
+static void sort_policies_by_name(List *policies);

-static int    row_security_policy_cmp(const void *a, const void *b);
+static int    row_security_policy_cmp(const ListCell *a, const ListCell *b);

 static void add_security_quals(int rt_index,
                                List *permissive_policies,
@@ -470,7 +470,7 @@ get_policies_for_relation(Relation relation, CmdType cmd, Oid user_id,
      * We sort restrictive policies by name so that any WCOs they generate are
      * checked in a well-defined order.
      */
-    *restrictive_policies = sort_policies_by_name(*restrictive_policies);
+    sort_policies_by_name(*restrictive_policies);

     /*
      * Then add any permissive or restrictive policies defined by extensions.
@@ -488,7 +488,7 @@ get_policies_for_relation(Relation relation, CmdType cmd, Oid user_id,
          * always check all built-in restrictive policies, in name order,
          * before checking restrictive policies added by hooks, in name order.
          */
-        hook_policies = sort_policies_by_name(hook_policies);
+        sort_policies_by_name(hook_policies);

         foreach(item, hook_policies)
         {
@@ -522,43 +522,20 @@ get_policies_for_relation(Relation relation, CmdType cmd, Oid user_id,
  * This is not necessary for permissive policies, since they are all combined
  * together using OR into a single WithCheckOption check.
  */
-static List *
+static void
 sort_policies_by_name(List *policies)
 {
-    int            npol = list_length(policies);
-    RowSecurityPolicy *pols;
-    ListCell   *item;
-    int            ii = 0;
-
-    if (npol <= 1)
-        return policies;
-
-    pols = (RowSecurityPolicy *) palloc(sizeof(RowSecurityPolicy) * npol);
-
-    foreach(item, policies)
-    {
-        RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item);
-
-        pols[ii++] = *policy;
-    }
-
-    qsort(pols, npol, sizeof(RowSecurityPolicy), row_security_policy_cmp);
-
-    policies = NIL;
-    for (ii = 0; ii < npol; ii++)
-        policies = lappend(policies, &pols[ii]);
-
-    return policies;
+    list_qsort(policies, row_security_policy_cmp);
 }

 /*
  * qsort comparator to sort RowSecurityPolicy entries by name
  */
 static int
-row_security_policy_cmp(const void *a, const void *b)
+row_security_policy_cmp(const ListCell *a, const ListCell *b)
 {
-    const RowSecurityPolicy *pa = (const RowSecurityPolicy *) a;
-    const RowSecurityPolicy *pb = (const RowSecurityPolicy *) b;
+    const RowSecurityPolicy *pa = (const RowSecurityPolicy *) lfirst(a);
+    const RowSecurityPolicy *pb = (const RowSecurityPolicy *) lfirst(b);

     /* Guard against NULL policy names from extensions */
     if (pa->policy_name == NULL)
diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h
index 8b1e4fb..8ed0a64 100644
--- a/src/include/nodes/pg_list.h
+++ b/src/include/nodes/pg_list.h
@@ -570,7 +570,7 @@ extern List *list_copy(const List *list);
 extern List *list_copy_tail(const List *list, int nskip);
 extern List *list_copy_deep(const List *oldlist);

-typedef int (*list_qsort_comparator) (const void *a, const void *b);
-extern List *list_qsort(const List *list, list_qsort_comparator cmp);
+typedef int (*list_qsort_comparator) (const ListCell *a, const ListCell *b);
+extern void list_qsort(List *list, list_qsort_comparator cmp);

 #endif                            /* PG_LIST_H */

pgsql-hackers by date:

Previous
From: Bruce Momjian
Date:
Subject: Re: [Proposal] Table-level Transparent Data Encryption (TDE) and KeyManagement Service (KMS)
Next
From: Robert Eckhardt
Date:
Subject: Re: Creating partitions automatically at least on HASH?