Re: POC: converting Lists into arrays - Mailing list pgsql-hackers

From Tom Lane
Subject Re: POC: converting Lists into arrays
Date
Msg-id 26193.1563228600@sss.pgh.pa.us
Whole thread Raw
In response to Re: POC: converting Lists into arrays  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-hackers
I wrote:
> [ list_qsort-API-change.patch ]

Also, here's a follow-on patch that cleans up some crufty code in
heap.c and relcache.c to use list_qsort, rather than manually doing
insertions into a list that's kept ordered.  The existing comments
argue that that's faster than qsort for small N, but I think that's
a bit questionable.  And anyway, that code is definitely O(N^2) if
N isn't so small, while this replacement logic is O(N log N).

This incidentally removes the only two calls of lappend_cell_oid.
There are no callers of lappend_cell_int, and only one of lappend_cell,
and that one would be noticeably cleaner if it were rewritten to use
list_insert_nth instead.  So I'm a bit tempted to do so and then nuke
all three of those functions, which would at least make some tiny dent
in Andres' unhappiness with the messiness of the List API.  Thoughts?

            regards, tom lane

diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index ab8a475..43d4252 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -125,7 +125,6 @@ static void SetRelationNumChecks(Relation rel, int numchecks);
 static Node *cookConstraint(ParseState *pstate,
                             Node *raw_constraint,
                             char *relname);
-static List *insert_ordered_unique_oid(List *list, Oid datum);


 /* ----------------------------------------------------------------
@@ -3384,55 +3383,19 @@ heap_truncate_find_FKs(List *relationIds)
         if (!list_member_oid(relationIds, con->confrelid))
             continue;

-        /* Add referencer unless already in input or result list */
+        /* Add referencer to result, unless present in input list */
         if (!list_member_oid(relationIds, con->conrelid))
-            result = insert_ordered_unique_oid(result, con->conrelid);
+            result = lappend_oid(result, con->conrelid);
     }

     systable_endscan(fkeyScan);
     table_close(fkeyRel, AccessShareLock);

-    return result;
-}
+    /* Now sort and de-duplicate the result list */
+    list_qsort(result, list_oid_cmp);
+    list_deduplicate_oid(result);

-/*
- * insert_ordered_unique_oid
- *        Insert a new Oid into a sorted list of Oids, preserving ordering,
- *        and eliminating duplicates
- *
- * Building the ordered list this way is O(N^2), but with a pretty small
- * constant, so for the number of entries we expect it will probably be
- * faster than trying to apply qsort().  It seems unlikely someone would be
- * trying to truncate a table with thousands of dependent tables ...
- */
-static List *
-insert_ordered_unique_oid(List *list, Oid datum)
-{
-    ListCell   *prev;
-
-    /* Does the datum belong at the front? */
-    if (list == NIL || datum < linitial_oid(list))
-        return lcons_oid(datum, list);
-    /* Does it match the first entry? */
-    if (datum == linitial_oid(list))
-        return list;            /* duplicate, so don't insert */
-    /* No, so find the entry it belongs after */
-    prev = list_head(list);
-    for (;;)
-    {
-        ListCell   *curr = lnext(list, prev);
-
-        if (curr == NULL || datum < lfirst_oid(curr))
-            break;                /* it belongs after 'prev', before 'curr' */
-
-        if (datum == lfirst_oid(curr))
-            return list;        /* duplicate, so don't insert */
-
-        prev = curr;
-    }
-    /* Insert datum into list after 'prev' */
-    lappend_cell_oid(list, prev, datum);
-    return list;
+    return result;
 }

 /*
diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c
index 8a7eb6a..c16b220 100644
--- a/src/backend/nodes/list.c
+++ b/src/backend/nodes/list.c
@@ -1298,6 +1298,34 @@ list_concat_unique_oid(List *list1, const List *list2)
 }

 /*
+ * Remove adjacent duplicates in a list of OIDs.
+ *
+ * It is caller's responsibility to have sorted the list to bring duplicates
+ * together, perhaps via list_qsort(list, list_oid_cmp).
+ */
+void
+list_deduplicate_oid(List *list)
+{
+    int            len;
+
+    Assert(IsOidList(list));
+    len = list_length(list);
+    if (len > 1)
+    {
+        ListCell   *elements = list->elements;
+        int            i = 0;
+
+        for (int j = 1; j < len; j++)
+        {
+            if (elements[i].oid_value != elements[j].oid_value)
+                elements[++i].oid_value = elements[j].oid_value;
+        }
+        list->length = i + 1;
+    }
+    check_list_invariants(list);
+}
+
+/*
  * Free all storage in a list, and optionally the pointed-to elements
  */
 static void
@@ -1440,3 +1468,19 @@ list_qsort(List *list, list_qsort_comparator cmp)
     if (len > 1)
         qsort(list->elements, len, sizeof(ListCell), (qsort_comparator) cmp);
 }
+
+/*
+ * list_qsort comparator for sorting a list into ascending OID order.
+ */
+int
+list_oid_cmp(const ListCell *p1, const ListCell *p2)
+{
+    Oid            v1 = lfirst_oid(p1);
+    Oid            v2 = lfirst_oid(p2);
+
+    if (v1 < v2)
+        return -1;
+    if (v1 > v2)
+        return 1;
+    return 0;
+}
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 3ca9a9f..13ce2b6 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -285,7 +285,6 @@ static TupleDesc GetPgIndexDescriptor(void);
 static void AttrDefaultFetch(Relation relation);
 static void CheckConstraintFetch(Relation relation);
 static int    CheckConstraintCmp(const void *a, const void *b);
-static List *insert_ordered_oid(List *list, Oid datum);
 static void InitIndexAmRoutine(Relation relation);
 static void IndexSupportInitialize(oidvector *indclass,
                                    RegProcedure *indexSupport,
@@ -4387,8 +4386,8 @@ RelationGetIndexList(Relation relation)
         if (!index->indislive)
             continue;

-        /* Add index's OID to result list in the proper order */
-        result = insert_ordered_oid(result, index->indexrelid);
+        /* add index's OID to result list */
+        result = lappend_oid(result, index->indexrelid);

         /*
          * Invalid, non-unique, non-immediate or predicate indexes aren't
@@ -4413,6 +4412,9 @@ RelationGetIndexList(Relation relation)

     table_close(indrel, AccessShareLock);

+    /* Sort the result list into OID order, per API spec. */
+    list_qsort(result, list_oid_cmp);
+
     /* Now save a copy of the completed list in the relcache entry. */
     oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
     oldlist = relation->rd_indexlist;
@@ -4494,13 +4496,16 @@ RelationGetStatExtList(Relation relation)
     {
         Oid            oid = ((Form_pg_statistic_ext) GETSTRUCT(htup))->oid;

-        result = insert_ordered_oid(result, oid);
+        result = lappend_oid(result, oid);
     }

     systable_endscan(indscan);

     table_close(indrel, AccessShareLock);

+    /* Sort the result list into OID order, per API spec. */
+    list_qsort(result, list_oid_cmp);
+
     /* Now save a copy of the completed list in the relcache entry. */
     oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
     oldlist = relation->rd_statlist;
@@ -4516,39 +4521,6 @@ RelationGetStatExtList(Relation relation)
 }

 /*
- * insert_ordered_oid
- *        Insert a new Oid into a sorted list of Oids, preserving ordering
- *
- * Building the ordered list this way is O(N^2), but with a pretty small
- * constant, so for the number of entries we expect it will probably be
- * faster than trying to apply qsort().  Most tables don't have very many
- * indexes...
- */
-static List *
-insert_ordered_oid(List *list, Oid datum)
-{
-    ListCell   *prev;
-
-    /* Does the datum belong at the front? */
-    if (list == NIL || datum < linitial_oid(list))
-        return lcons_oid(datum, list);
-    /* No, so find the entry it belongs after */
-    prev = list_head(list);
-    for (;;)
-    {
-        ListCell   *curr = lnext(list, prev);
-
-        if (curr == NULL || datum < lfirst_oid(curr))
-            break;                /* it belongs after 'prev', before 'curr' */
-
-        prev = curr;
-    }
-    /* Insert datum into list after 'prev' */
-    lappend_cell_oid(list, prev, datum);
-    return list;
-}
-
-/*
  * RelationGetPrimaryKeyIndex -- get OID of the relation's primary key index
  *
  * Returns InvalidOid if there is no such index.
diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h
index 8ed0a64..33f39ba 100644
--- a/src/include/nodes/pg_list.h
+++ b/src/include/nodes/pg_list.h
@@ -563,6 +563,8 @@ extern List *list_concat_unique_ptr(List *list1, const List *list2);
 extern List *list_concat_unique_int(List *list1, const List *list2);
 extern List *list_concat_unique_oid(List *list1, const List *list2);

+extern void list_deduplicate_oid(List *list);
+
 extern void list_free(List *list);
 extern void list_free_deep(List *list);

@@ -573,4 +575,6 @@ extern List *list_copy_deep(const List *oldlist);
 typedef int (*list_qsort_comparator) (const ListCell *a, const ListCell *b);
 extern void list_qsort(List *list, list_qsort_comparator cmp);

+extern int    list_oid_cmp(const ListCell *p1, const ListCell *p2);
+
 #endif                            /* PG_LIST_H */

pgsql-hackers by date:

Previous
From: Sehrope Sarkuni
Date:
Subject: Re: [Proposal] Table-level Transparent Data Encryption (TDE) and KeyManagement Service (KMS)
Next
From: Bruce Momjian
Date:
Subject: Re: [Proposal] Table-level Transparent Data Encryption (TDE) and KeyManagement Service (KMS)