Home > mailing lists
Experimenting with hash tables inside pg_dump - Mailing list pgsql-hackers

From	Tom Lane
Subject	Experimenting with hash tables inside pg_dump
Date	October 21, 2021 22:27:25
Msg-id	2595220.1634855245@sss.pgh.pa.us Whole thread Raw
Responses	Re: Experimenting with hash tables inside pg_dump Re: Experimenting with hash tables inside pg_dump
List	pgsql-hackers
Tree view
Today, pg_dump does a lot of internal lookups via binary search
in presorted arrays.  I thought it might improve matters
to replace those binary searches with hash tables, theoretically
converting O(log N) searches into O(1) searches.  So I tried making
a hash table indexed by CatalogId (tableoid+oid) with simplehash.h,
and replacing as many data structures as I could with that.

This makes the code shorter and (IMO anyway) cleaner, but

(a) the executable size increases by a few KB --- apparently, even
the minimum subset of simplehash.h's functionality is code-wasteful.

(b) I couldn't measure any change in performance at all.  I tried
it on the regression database and on a toy DB with 10000 simple
tables.  Maybe on a really large DB you'd notice some difference,
but I'm not very optimistic now.

So this experiment feels like a failure, but I thought I'd post
the patch and results for the archives' sake.  Maybe somebody
will think of a way to improve matters.  Or maybe it's worth
doing just to shorten the code?

            regards, tom lane

diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c
index 1f24e79665..49932fd598 100644
--- a/src/bin/pg_dump/common.c
+++ b/src/bin/pg_dump/common.c
@@ -18,6 +18,14 @@
 #include <ctype.h>

 #include "catalog/pg_class_d.h"
+#include "catalog/pg_collation_d.h"
+#include "catalog/pg_extension_d.h"
+#include "catalog/pg_namespace_d.h"
+#include "catalog/pg_operator_d.h"
+#include "catalog/pg_proc_d.h"
+#include "catalog/pg_publication_d.h"
+#include "catalog/pg_type_d.h"
+#include "common/hashfn.h"
 #include "fe_utils/string_utils.h"
 #include "pg_backup_archiver.h"
 #include "pg_backup_utils.h"
@@ -31,54 +39,54 @@ static int    allocedDumpIds = 0;
 static DumpId lastDumpId = 0;    /* Note: 0 is InvalidDumpId */

 /*
- * Variables for mapping CatalogId to DumpableObject
- */
-static bool catalogIdMapValid = false;
-static DumpableObject **catalogIdMap = NULL;
-static int    numCatalogIds = 0;
-
-/*
- * These variables are static to avoid the notational cruft of having to pass
- * them into findTableByOid() and friends.  For each of these arrays, we build
- * a sorted-by-OID index array immediately after the objects are fetched,
- * and then we use binary search in findTableByOid() and friends.  (qsort'ing
- * the object arrays themselves would be simpler, but it doesn't work because
- * pg_dump.c may have already established pointers between items.)
+ * Infrastructure for mapping CatalogId to DumpableObject
+ *
+ * We use a hash table generated by simplehash.h.  That infrastructure
+ * requires all the hash table entries to be the same size, and it also
+ * expects that it can move them around when resizing the table.  So we
+ * cannot make the DumpableObjects be elements of the hash table directly;
+ * instead, the hash table elements contain pointers to DumpableObjects.
+ *
+ * It turns out to be convenient to also use this data structure to map
+ * CatalogIds to owning extensions, if any.  Since extension membership
+ * data is read before creating most DumpableObjects, either one of dobj
+ * and ext could be NULL.
  */
-static DumpableObject **tblinfoindex;
-static DumpableObject **typinfoindex;
-static DumpableObject **funinfoindex;
-static DumpableObject **oprinfoindex;
-static DumpableObject **collinfoindex;
-static DumpableObject **nspinfoindex;
-static DumpableObject **extinfoindex;
-static DumpableObject **pubinfoindex;
-static int    numTables;
-static int    numTypes;
-static int    numFuncs;
-static int    numOperators;
-static int    numCollations;
-static int    numNamespaces;
-static int    numExtensions;
-static int    numPublications;
-
-/* This is an array of object identities, not actual DumpableObjects */
-static ExtensionMemberId *extmembers;
-static int    numextmembers;
+typedef struct _catalogIdMapEntry
+{
+    CatalogId    catId;            /* the indexed CatalogId */
+    uint32        status;            /* hash status */
+    uint32        hashval;        /* hash code for the CatalogId */
+    DumpableObject *dobj;        /* the associated DumpableObject, if any */
+    ExtensionInfo *ext;            /* owning extension, if any */
+} CatalogIdMapEntry;
+
+#define SH_PREFIX        catalogid
+#define SH_ELEMENT_TYPE    CatalogIdMapEntry
+#define SH_KEY_TYPE        CatalogId
+#define    SH_KEY            catId
+#define SH_HASH_KEY(tb, key)    hash_bytes((const unsigned char *) &(key), sizeof(CatalogId))
+#define SH_EQUAL(tb, a, b)        ((a).oid == (b).oid && (a).tableoid == (b).tableoid)
+#define SH_STORE_HASH
+#define SH_GET_HASH(tb, a) (a)->hashval
+#define    SH_SCOPE        static inline
+#define SH_RAW_ALLOCATOR    pg_malloc0
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+#define CATALOGIDHASH_INITIAL_SIZE    10000
+
+static catalogid_hash *catalogIdHash = NULL;

 static void flagInhTables(Archive *fout, TableInfo *tbinfo, int numTables,
                           InhInfo *inhinfo, int numInherits);
 static void flagInhIndexes(Archive *fout, TableInfo *tblinfo, int numTables);
 static void flagInhAttrs(DumpOptions *dopt, TableInfo *tblinfo, int numTables);
-static DumpableObject **buildIndexArray(void *objArray, int numObjs,
-                                        Size objSize);
-static int    DOCatalogIdCompare(const void *p1, const void *p2);
-static int    ExtensionMemberIdCompare(const void *p1, const void *p2);
 static void findParentsByOid(TableInfo *self,
                              InhInfo *inhinfo, int numInherits);
 static int    strInArray(const char *pattern, char **arr, int arr_size);
-static IndxInfo *findIndexByOid(Oid oid, DumpableObject **idxinfoindex,
-                                int numIndexes);
+static IndxInfo *findIndexByOid(Oid oid);


 /*
@@ -89,14 +97,16 @@ TableInfo *
 getSchemaData(Archive *fout, int *numTablesPtr)
 {
     TableInfo  *tblinfo;
-    TypeInfo   *typinfo;
-    FuncInfo   *funinfo;
-    OprInfo    *oprinfo;
-    CollInfo   *collinfo;
-    NamespaceInfo *nspinfo;
     ExtensionInfo *extinfo;
-    PublicationInfo *pubinfo;
     InhInfo    *inhinfo;
+    int            numTables;
+    int            numTypes;
+    int            numFuncs;
+    int            numOperators;
+    int            numCollations;
+    int            numNamespaces;
+    int            numExtensions;
+    int            numPublications;
     int            numAggregates;
     int            numInherits;
     int            numRules;
@@ -123,14 +133,12 @@ getSchemaData(Archive *fout, int *numTablesPtr)
      */
     pg_log_info("reading extensions");
     extinfo = getExtensions(fout, &numExtensions);
-    extinfoindex = buildIndexArray(extinfo, numExtensions, sizeof(ExtensionInfo));

     pg_log_info("identifying extension members");
     getExtensionMembership(fout, extinfo, numExtensions);

     pg_log_info("reading schemas");
-    nspinfo = getNamespaces(fout, &numNamespaces);
-    nspinfoindex = buildIndexArray(nspinfo, numNamespaces, sizeof(NamespaceInfo));
+    (void) getNamespaces(fout, &numNamespaces);

     /*
      * getTables should be done as soon as possible, so as to minimize the
@@ -140,19 +148,15 @@ getSchemaData(Archive *fout, int *numTablesPtr)
      */
     pg_log_info("reading user-defined tables");
     tblinfo = getTables(fout, &numTables);
-    tblinfoindex = buildIndexArray(tblinfo, numTables, sizeof(TableInfo));

-    /* Do this after we've built tblinfoindex */
     getOwnedSeqs(fout, tblinfo, numTables);

     pg_log_info("reading user-defined functions");
-    funinfo = getFuncs(fout, &numFuncs);
-    funinfoindex = buildIndexArray(funinfo, numFuncs, sizeof(FuncInfo));
+    (void) getFuncs(fout, &numFuncs);

     /* this must be after getTables and getFuncs */
     pg_log_info("reading user-defined types");
-    typinfo = getTypes(fout, &numTypes);
-    typinfoindex = buildIndexArray(typinfo, numTypes, sizeof(TypeInfo));
+    (void) getTypes(fout, &numTypes);

     /* this must be after getFuncs, too */
     pg_log_info("reading procedural languages");
@@ -162,8 +166,7 @@ getSchemaData(Archive *fout, int *numTablesPtr)
     getAggregates(fout, &numAggregates);

     pg_log_info("reading user-defined operators");
-    oprinfo = getOperators(fout, &numOperators);
-    oprinfoindex = buildIndexArray(oprinfo, numOperators, sizeof(OprInfo));
+    (void) getOperators(fout, &numOperators);

     pg_log_info("reading user-defined access methods");
     getAccessMethods(fout, &numAccessMethods);
@@ -196,8 +199,7 @@ getSchemaData(Archive *fout, int *numTablesPtr)
     getDefaultACLs(fout, &numDefaultACLs);

     pg_log_info("reading user-defined collations");
-    collinfo = getCollations(fout, &numCollations);
-    collinfoindex = buildIndexArray(collinfo, numCollations, sizeof(CollInfo));
+    (void) getCollations(fout, &numCollations);

     pg_log_info("reading user-defined conversions");
     getConversions(fout, &numConversions);
@@ -250,9 +252,7 @@ getSchemaData(Archive *fout, int *numTablesPtr)
     getPolicies(fout, tblinfo, numTables);

     pg_log_info("reading publications");
-    pubinfo = getPublications(fout, &numPublications);
-    pubinfoindex = buildIndexArray(pubinfo, numPublications,
-                                   sizeof(PublicationInfo));
+    (void) getPublications(fout, &numPublications);

     pg_log_info("reading publication membership");
     getPublicationTables(fout, tblinfo, numTables);
@@ -375,34 +375,15 @@ flagInhIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
     int            i,
                 j,
                 k;
-    DumpableObject ***parentIndexArray;
-
-    parentIndexArray = (DumpableObject ***)
-        pg_malloc0(getMaxDumpId() * sizeof(DumpableObject **));

     for (i = 0; i < numTables; i++)
     {
-        TableInfo  *parenttbl;
         IndexAttachInfo *attachinfo;

         if (!tblinfo[i].ispartition || tblinfo[i].numParents == 0)
             continue;

         Assert(tblinfo[i].numParents == 1);
-        parenttbl = tblinfo[i].parents[0];
-
-        /*
-         * We need access to each parent table's index list, but there is no
-         * index to cover them outside of this function.  To avoid having to
-         * sort every parent table's indexes each time we come across each of
-         * its partitions, create an indexed array for each parent the first
-         * time it is required.
-         */
-        if (parentIndexArray[parenttbl->dobj.dumpId] == NULL)
-            parentIndexArray[parenttbl->dobj.dumpId] =
-                buildIndexArray(parenttbl->indexes,
-                                parenttbl->numIndexes,
-                                sizeof(IndxInfo));

         attachinfo = (IndexAttachInfo *)
             pg_malloc0(tblinfo[i].numIndexes * sizeof(IndexAttachInfo));
@@ -414,9 +395,7 @@ flagInhIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
             if (index->parentidx == 0)
                 continue;

-            parentidx = findIndexByOid(index->parentidx,
-                                       parentIndexArray[parenttbl->dobj.dumpId],
-                                       parenttbl->numIndexes);
+            parentidx = findIndexByOid(index->parentidx);
             if (parentidx == NULL)
                 continue;

@@ -457,11 +436,6 @@ flagInhIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
             k++;
         }
     }
-
-    for (i = 0; i < numTables; i++)
-        if (parentIndexArray[i])
-            pg_free(parentIndexArray[i]);
-    pg_free(parentIndexArray);
 }

 /* flagInhAttrs -
@@ -596,7 +570,7 @@ flagInhAttrs(DumpOptions *dopt, TableInfo *tblinfo, int numTables)
 /*
  * AssignDumpId
  *        Given a newly-created dumpable object, assign a dump ID,
- *        and enter the object into the lookup table.
+ *        and enter the object into the lookup tables.
  *
  * The caller is expected to have filled in objType and catId,
  * but not any of the other standard fields of a DumpableObject.
@@ -615,6 +589,7 @@ AssignDumpId(DumpableObject *dobj)
     dobj->nDeps = 0;
     dobj->allocDeps = 0;

+    /* Add object to dumpIdMap[], enlarging that array if need be */
     while (dobj->dumpId >= allocedDumpIds)
     {
         int            newAlloc;
@@ -637,8 +612,25 @@ AssignDumpId(DumpableObject *dobj)
     }
     dumpIdMap[dobj->dumpId] = dobj;

-    /* mark catalogIdMap invalid, but don't rebuild it yet */
-    catalogIdMapValid = false;
+    /* If it has a valid CatalogId, enter it into the hash table */
+    if (OidIsValid(dobj->catId.tableoid))
+    {
+        CatalogIdMapEntry *entry;
+        bool        found;
+
+        /* Initialize CatalogId hash table if not done yet */
+        if (catalogIdHash == NULL)
+            catalogIdHash = catalogid_create(CATALOGIDHASH_INITIAL_SIZE, NULL);
+
+        entry = catalogid_insert(catalogIdHash, dobj->catId, &found);
+        if (!found)
+        {
+            entry->dobj = NULL;
+            entry->ext = NULL;
+        }
+        Assert(entry->dobj == NULL);
+        entry->dobj = dobj;
+    }
 }

 /*
@@ -679,140 +671,19 @@ findObjectByDumpId(DumpId dumpId)
  * Find a DumpableObject by catalog ID
  *
  * Returns NULL for unknown ID
- *
- * We use binary search in a sorted list that is built on first call.
- * If AssignDumpId() and findObjectByCatalogId() calls were freely intermixed,
- * the code would work, but possibly be very slow.  In the current usage
- * pattern that does not happen, indeed we build the list at most twice.
  */
 DumpableObject *
 findObjectByCatalogId(CatalogId catalogId)
 {
-    DumpableObject **low;
-    DumpableObject **high;
-
-    if (!catalogIdMapValid)
-    {
-        if (catalogIdMap)
-            free(catalogIdMap);
-        getDumpableObjects(&catalogIdMap, &numCatalogIds);
-        if (numCatalogIds > 1)
-            qsort((void *) catalogIdMap, numCatalogIds,
-                  sizeof(DumpableObject *), DOCatalogIdCompare);
-        catalogIdMapValid = true;
-    }
-
-    /*
-     * We could use bsearch() here, but the notational cruft of calling
-     * bsearch is nearly as bad as doing it ourselves; and the generalized
-     * bsearch function is noticeably slower as well.
-     */
-    if (numCatalogIds <= 0)
-        return NULL;
-    low = catalogIdMap;
-    high = catalogIdMap + (numCatalogIds - 1);
-    while (low <= high)
-    {
-        DumpableObject **middle;
-        int            difference;
-
-        middle = low + (high - low) / 2;
-        /* comparison must match DOCatalogIdCompare, below */
-        difference = oidcmp((*middle)->catId.oid, catalogId.oid);
-        if (difference == 0)
-            difference = oidcmp((*middle)->catId.tableoid, catalogId.tableoid);
-        if (difference == 0)
-            return *middle;
-        else if (difference < 0)
-            low = middle + 1;
-        else
-            high = middle - 1;
-    }
-    return NULL;
-}
-
-/*
- * Find a DumpableObject by OID, in a pre-sorted array of one type of object
- *
- * Returns NULL for unknown OID
- */
-static DumpableObject *
-findObjectByOid(Oid oid, DumpableObject **indexArray, int numObjs)
-{
-    DumpableObject **low;
-    DumpableObject **high;
+    CatalogIdMapEntry *entry;

-    /*
-     * This is the same as findObjectByCatalogId except we assume we need not
-     * look at table OID because the objects are all the same type.
-     *
-     * We could use bsearch() here, but the notational cruft of calling
-     * bsearch is nearly as bad as doing it ourselves; and the generalized
-     * bsearch function is noticeably slower as well.
-     */
-    if (numObjs <= 0)
-        return NULL;
-    low = indexArray;
-    high = indexArray + (numObjs - 1);
-    while (low <= high)
-    {
-        DumpableObject **middle;
-        int            difference;
-
-        middle = low + (high - low) / 2;
-        difference = oidcmp((*middle)->catId.oid, oid);
-        if (difference == 0)
-            return *middle;
-        else if (difference < 0)
-            low = middle + 1;
-        else
-            high = middle - 1;
-    }
-    return NULL;
-}
-
-/*
- * Build an index array of DumpableObject pointers, sorted by OID
- */
-static DumpableObject **
-buildIndexArray(void *objArray, int numObjs, Size objSize)
-{
-    DumpableObject **ptrs;
-    int            i;
+    if (catalogIdHash == NULL)
+        return NULL;            /* no objects exist yet */

-    if (numObjs <= 0)
+    entry = catalogid_lookup(catalogIdHash, catalogId);
+    if (entry == NULL)
         return NULL;
-
-    ptrs = (DumpableObject **) pg_malloc(numObjs * sizeof(DumpableObject *));
-    for (i = 0; i < numObjs; i++)
-        ptrs[i] = (DumpableObject *) ((char *) objArray + i * objSize);
-
-    /* We can use DOCatalogIdCompare to sort since its first key is OID */
-    if (numObjs > 1)
-        qsort((void *) ptrs, numObjs, sizeof(DumpableObject *),
-              DOCatalogIdCompare);
-
-    return ptrs;
-}
-
-/*
- * qsort comparator for pointers to DumpableObjects
- */
-static int
-DOCatalogIdCompare(const void *p1, const void *p2)
-{
-    const DumpableObject *obj1 = *(DumpableObject *const *) p1;
-    const DumpableObject *obj2 = *(DumpableObject *const *) p2;
-    int            cmpval;
-
-    /*
-     * Compare OID first since it's usually unique, whereas there will only be
-     * a few distinct values of tableoid.
-     */
-    cmpval = oidcmp(obj1->catId.oid, obj2->catId.oid);
-    if (cmpval == 0)
-        cmpval = oidcmp(obj1->catId.tableoid, obj2->catId.tableoid);
-    return cmpval;
+    return entry->dobj;
 }

 /*
@@ -886,119 +757,169 @@ removeObjectDependency(DumpableObject *dobj, DumpId refId)

 /*
  * findTableByOid
- *      finds the entry (in tblinfo) of the table with the given oid
+ *      finds the DumpableObject for the table with the given oid
  *      returns NULL if not found
  */
 TableInfo *
 findTableByOid(Oid oid)
 {
-    return (TableInfo *) findObjectByOid(oid, tblinfoindex, numTables);
+    CatalogId    catId;
+    DumpableObject *dobj;
+
+    catId.tableoid = RelationRelationId;
+    catId.oid = oid;
+    dobj = findObjectByCatalogId(catId);
+    Assert(dobj == NULL || dobj->objType == DO_TABLE);
+    return (TableInfo *) dobj;
+}
+
+/*
+ * findIndexByOid
+ *      finds the DumpableObject for the index with the given oid
+ *      returns NULL if not found
+ */
+static IndxInfo *
+findIndexByOid(Oid oid)
+{
+    CatalogId    catId;
+    DumpableObject *dobj;
+
+    catId.tableoid = RelationRelationId;
+    catId.oid = oid;
+    dobj = findObjectByCatalogId(catId);
+    Assert(dobj == NULL || dobj->objType == DO_INDEX);
+    return (IndxInfo *) dobj;
 }

 /*
  * findTypeByOid
- *      finds the entry (in typinfo) of the type with the given oid
+ *      finds the DumpableObject for the type with the given oid
  *      returns NULL if not found
  */
 TypeInfo *
 findTypeByOid(Oid oid)
 {
-    return (TypeInfo *) findObjectByOid(oid, typinfoindex, numTypes);
+    CatalogId    catId;
+
+    catId.tableoid = TypeRelationId;
+    catId.oid = oid;
+    return (TypeInfo *) findObjectByCatalogId(catId);
 }

 /*
  * findFuncByOid
- *      finds the entry (in funinfo) of the function with the given oid
+ *      finds the DumpableObject for the function with the given oid
  *      returns NULL if not found
  */
 FuncInfo *
 findFuncByOid(Oid oid)
 {
-    return (FuncInfo *) findObjectByOid(oid, funinfoindex, numFuncs);
+    CatalogId    catId;
+
+    catId.tableoid = ProcedureRelationId;
+    catId.oid = oid;
+    return (FuncInfo *) findObjectByCatalogId(catId);
 }

 /*
  * findOprByOid
- *      finds the entry (in oprinfo) of the operator with the given oid
+ *      finds the DumpableObject for the operator with the given oid
  *      returns NULL if not found
  */
 OprInfo *
 findOprByOid(Oid oid)
 {
-    return (OprInfo *) findObjectByOid(oid, oprinfoindex, numOperators);
+    CatalogId    catId;
+
+    catId.tableoid = OperatorRelationId;
+    catId.oid = oid;
+    return (OprInfo *) findObjectByCatalogId(catId);
 }

 /*
  * findCollationByOid
- *      finds the entry (in collinfo) of the collation with the given oid
+ *      finds the DumpableObject for the collation with the given oid
  *      returns NULL if not found
  */
 CollInfo *
 findCollationByOid(Oid oid)
 {
-    return (CollInfo *) findObjectByOid(oid, collinfoindex, numCollations);
+    CatalogId    catId;
+
+    catId.tableoid = CollationRelationId;
+    catId.oid = oid;
+    return (CollInfo *) findObjectByCatalogId(catId);
 }

 /*
  * findNamespaceByOid
- *      finds the entry (in nspinfo) of the namespace with the given oid
+ *      finds the DumpableObject for the namespace with the given oid
  *      returns NULL if not found
  */
 NamespaceInfo *
 findNamespaceByOid(Oid oid)
 {
-    return (NamespaceInfo *) findObjectByOid(oid, nspinfoindex, numNamespaces);
+    CatalogId    catId;
+
+    catId.tableoid = NamespaceRelationId;
+    catId.oid = oid;
+    return (NamespaceInfo *) findObjectByCatalogId(catId);
 }

 /*
  * findExtensionByOid
- *      finds the entry (in extinfo) of the extension with the given oid
+ *      finds the DumpableObject for the extension with the given oid
  *      returns NULL if not found
  */
 ExtensionInfo *
 findExtensionByOid(Oid oid)
 {
-    return (ExtensionInfo *) findObjectByOid(oid, extinfoindex, numExtensions);
+    CatalogId    catId;
+
+    catId.tableoid = ExtensionRelationId;
+    catId.oid = oid;
+    return (ExtensionInfo *) findObjectByCatalogId(catId);
 }

 /*
  * findPublicationByOid
- *      finds the entry (in pubinfo) of the publication with the given oid
+ *      finds the DumpableObject for the publication with the given oid
  *      returns NULL if not found
  */
 PublicationInfo *
 findPublicationByOid(Oid oid)
 {
-    return (PublicationInfo *) findObjectByOid(oid, pubinfoindex, numPublications);
-}
+    CatalogId    catId;

-/*
- * findIndexByOid
- *        find the entry of the index with the given oid
- *
- * This one's signature is different from the previous ones because we lack a
- * global array of all indexes, so caller must pass their array as argument.
- */
-static IndxInfo *
-findIndexByOid(Oid oid, DumpableObject **idxinfoindex, int numIndexes)
-{
-    return (IndxInfo *) findObjectByOid(oid, idxinfoindex, numIndexes);
+    catId.tableoid = PublicationRelationId;
+    catId.oid = oid;
+    return (PublicationInfo *) findObjectByCatalogId(catId);
 }

+
 /*
- * setExtensionMembership
- *      accept and save data about which objects belong to extensions
+ * recordExtensionMembership
+ *      Record that the object identified by the given catalog ID
+ *      belongs to the given extension
  */
 void
-setExtensionMembership(ExtensionMemberId *extmems, int nextmems)
+recordExtensionMembership(CatalogId catId, ExtensionInfo *ext)
 {
-    /* Sort array in preparation for binary searches */
-    if (nextmems > 1)
-        qsort((void *) extmems, nextmems, sizeof(ExtensionMemberId),
-              ExtensionMemberIdCompare);
-    /* And save */
-    extmembers = extmems;
-    numextmembers = nextmems;
+    CatalogIdMapEntry *entry;
+    bool        found;
+
+    /* CatalogId hash table must exist, if we have an ExtensionInfo */
+    Assert(catalogIdHash != NULL);
+
+    /* Add reference to CatalogId hash */
+    entry = catalogid_insert(catalogIdHash, catId, &found);
+    if (!found)
+    {
+        entry->dobj = NULL;
+        entry->ext = NULL;
+    }
+    Assert(entry->ext == NULL);
+    entry->ext = ext;
 }

 /*
@@ -1008,56 +929,15 @@ setExtensionMembership(ExtensionMemberId *extmems, int nextmems)
 ExtensionInfo *
 findOwningExtension(CatalogId catalogId)
 {
-    ExtensionMemberId *low;
-    ExtensionMemberId *high;
+    CatalogIdMapEntry *entry;

-    /*
-     * We could use bsearch() here, but the notational cruft of calling
-     * bsearch is nearly as bad as doing it ourselves; and the generalized
-     * bsearch function is noticeably slower as well.
-     */
-    if (numextmembers <= 0)
-        return NULL;
-    low = extmembers;
-    high = extmembers + (numextmembers - 1);
-    while (low <= high)
-    {
-        ExtensionMemberId *middle;
-        int            difference;
-
-        middle = low + (high - low) / 2;
-        /* comparison must match ExtensionMemberIdCompare, below */
-        difference = oidcmp(middle->catId.oid, catalogId.oid);
-        if (difference == 0)
-            difference = oidcmp(middle->catId.tableoid, catalogId.tableoid);
-        if (difference == 0)
-            return middle->ext;
-        else if (difference < 0)
-            low = middle + 1;
-        else
-            high = middle - 1;
-    }
-    return NULL;
-}
-
-/*
- * qsort comparator for ExtensionMemberIds
- */
-static int
-ExtensionMemberIdCompare(const void *p1, const void *p2)
-{
-    const ExtensionMemberId *obj1 = (const ExtensionMemberId *) p1;
-    const ExtensionMemberId *obj2 = (const ExtensionMemberId *) p2;
-    int            cmpval;
+    if (catalogIdHash == NULL)
+        return NULL;            /* no objects exist yet */

-    /*
-     * Compare OID first since it's usually unique, whereas there will only be
-     * a few distinct values of tableoid.
-     */
-    cmpval = oidcmp(obj1->catId.oid, obj2->catId.oid);
-    if (cmpval == 0)
-        cmpval = oidcmp(obj1->catId.tableoid, obj2->catId.tableoid);
-    return cmpval;
+    entry = catalogid_lookup(catalogIdHash, catalogId);
+    if (entry == NULL)
+        return NULL;
+    return entry->ext;
 }


diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index 3c1cd858a8..6af10a85a2 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -236,6 +236,7 @@ typedef struct Archive

 typedef struct
 {
+    /* Note: this struct must not contain any unused bytes */
     Oid            tableoid;
     Oid            oid;
 } CatalogId;
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index ed8ed2f266..948615a5b9 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -17901,12 +17901,10 @@ getExtensionMembership(Archive *fout, ExtensionInfo extinfo[],
     PQExpBuffer query;
     PGresult   *res;
     int            ntups,
-                nextmembers,
                 i;
     int            i_classid,
                 i_objid,
                 i_refobjid;
-    ExtensionMemberId *extmembers;
     ExtensionInfo *ext;

     /* Nothing to do if no extensions */
@@ -17931,12 +17929,7 @@ getExtensionMembership(Archive *fout, ExtensionInfo extinfo[],
     i_objid = PQfnumber(res, "objid");
     i_refobjid = PQfnumber(res, "refobjid");

-    extmembers = (ExtensionMemberId *) pg_malloc(ntups * sizeof(ExtensionMemberId));
-    nextmembers = 0;
-
     /*
-     * Accumulate data into extmembers[].
-     *
      * Since we ordered the SELECT by referenced ID, we can expect that
      * multiple entries for the same extension will appear together; this
      * saves on searches.
@@ -17963,16 +17956,11 @@ getExtensionMembership(Archive *fout, ExtensionInfo extinfo[],
             continue;
         }

-        extmembers[nextmembers].catId = objId;
-        extmembers[nextmembers].ext = ext;
-        nextmembers++;
+        recordExtensionMembership(objId, ext);
     }

     PQclear(res);

-    /* Remember the data for use later */
-    setExtensionMembership(extmembers, nextmembers);
-
     destroyPQExpBuffer(query);
 }

diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 29af845ece..cc55e598ec 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -647,16 +647,6 @@ typedef struct _SubscriptionInfo
     char       *subpublications;
 } SubscriptionInfo;

-/*
- * We build an array of these with an entry for each object that is an
- * extension member according to pg_depend.
- */
-typedef struct _extensionMemberId
-{
-    CatalogId    catId;            /* tableoid+oid of some member object */
-    ExtensionInfo *ext;            /* owning extension */
-} ExtensionMemberId;
-
 /*
  *    common utility functions
  */
@@ -682,7 +672,7 @@ extern NamespaceInfo *findNamespaceByOid(Oid oid);
 extern ExtensionInfo *findExtensionByOid(Oid oid);
 extern PublicationInfo *findPublicationByOid(Oid oid);

-extern void setExtensionMembership(ExtensionMemberId *extmems, int nextmems);
+extern void recordExtensionMembership(CatalogId catId, ExtensionInfo *ext);
 extern ExtensionInfo *findOwningExtension(CatalogId catalogId);

 extern void parseOidArray(const char *str, Oid *array, int arraysize);
pgsql-hackers by date:
From: "Bossart, Nathan"
Date: 21 October 2021, 22:23:20
Subject: Re: Fixing WAL instability in various TAP tests
From: "Bossart, Nathan"
Date: 21 October 2021, 23:04:34
Subject: Re: CREATEROLE and role ownership hierarchies
Experimenting with hash tables inside pg_dump - Mailing list pgsql-hackers

Previous

Next