Re: check for interrupts in set_rtable_names - Mailing list pgsql-hackers

From Tom Lane
Subject Re: check for interrupts in set_rtable_names
Date
Msg-id 16882.1447632852@sss.pgh.pa.us
Whole thread Raw
In response to Re: check for interrupts in set_rtable_names  (Jeff Janes <jeff.janes@gmail.com>)
Responses Re: check for interrupts in set_rtable_names  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-hackers
Jeff Janes <jeff.janes@gmail.com> writes:
> On Fri, Nov 13, 2015 at 3:13 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
>> There's probably no reason not to do that, but I'd be much more interested
>> in eliminating the slowness to begin with ...

> I was thinking about that as well, but I don't think that would be
> back-patchable, at least not the way I was envisioning it.

> I was thinking of detecting bad cases (had to count to over 10 before
> finding a novel name, more than 10 times) and then switching from an
> object-local count, to a global count, for the numbers to add to the
> name.  But that would be a behavior change under some conditions.

I experimented with using a hash table to avoid the O(N^2) behavior.
This seems to work quite well, and I think it doesn't change the results
(leastwise, it does not break the regression tests).

It would be possible to do something similar to dodge the O(N^2) costs
in make_colname_unique/colname_is_unique; but it would be a lot messier,
and the tests I did suggest that it's fairly hard to get into a regime
where those functions are a huge part of the runtime anyway, because
we have limits on the numbers of columns.  So I'm inclined to leave that
alone.

            regards, tom lane

diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 3388f7c..2b6de8f 100644
*** a/src/backend/utils/adt/ruleutils.c
--- b/src/backend/utils/adt/ruleutils.c
***************
*** 55,60 ****
--- 55,61 ----
  #include "utils/array.h"
  #include "utils/builtins.h"
  #include "utils/fmgroids.h"
+ #include "utils/hsearch.h"
  #include "utils/lsyscache.h"
  #include "utils/rel.h"
  #include "utils/ruleutils.h"
*************** typedef struct
*** 267,272 ****
--- 268,282 ----
  #define deparse_columns_fetch(rangetable_index, dpns) \
      ((deparse_columns *) list_nth((dpns)->rtable_columns, (rangetable_index)-1))

+ /*
+  * Entry in set_rtable_names' hash table
+  */
+ typedef struct
+ {
+     char        name[NAMEDATALEN];        /* Hash key --- must be first */
+     int            counter;        /* Largest addition used so far for name */
+ } NameHashEntry;
+

  /* ----------
   * Global data
*************** static void print_function_rettype(Strin
*** 312,319 ****
  static void print_function_trftypes(StringInfo buf, HeapTuple proctup);
  static void set_rtable_names(deparse_namespace *dpns, List *parent_namespaces,
                   Bitmapset *rels_used);
- static bool refname_is_unique(char *refname, deparse_namespace *dpns,
-                   List *parent_namespaces);
  static void set_deparse_for_query(deparse_namespace *dpns, Query *query,
                        List *parent_namespaces);
  static void set_simple_column_names(deparse_namespace *dpns);
--- 322,327 ----
*************** static void
*** 2676,2690 ****
  set_rtable_names(deparse_namespace *dpns, List *parent_namespaces,
                   Bitmapset *rels_used)
  {
      ListCell   *lc;
-     int            rtindex = 1;

      dpns->rtable_names = NIL;
      foreach(lc, dpns->rtable)
      {
          RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
          char       *refname;

          if (rels_used && !bms_is_member(rtindex, rels_used))
          {
              /* Ignore unreferenced RTE */
--- 2684,2744 ----
  set_rtable_names(deparse_namespace *dpns, List *parent_namespaces,
                   Bitmapset *rels_used)
  {
+     HASHCTL        hash_ctl;
+     HTAB       *names_hash;
+     NameHashEntry *hentry;
+     bool        found;
+     int            rtindex;
      ListCell   *lc;

      dpns->rtable_names = NIL;
+     /* nothing more to do if empty rtable */
+     if (dpns->rtable == NIL)
+         return;
+
+     /*
+      * We use a hash table to hold known names, so that this process is O(N)
+      * not O(N^2) for N names.
+      */
+     MemSet(&hash_ctl, 0, sizeof(hash_ctl));
+     hash_ctl.keysize = NAMEDATALEN;
+     hash_ctl.entrysize = sizeof(NameHashEntry);
+     hash_ctl.hcxt = CurrentMemoryContext;
+     names_hash = hash_create("set_rtable_names names",
+                              list_length(dpns->rtable),
+                              &hash_ctl,
+                              HASH_ELEM | HASH_CONTEXT);
+     /* Preload the hash table with names appearing in parent_namespaces */
+     foreach(lc, parent_namespaces)
+     {
+         deparse_namespace *olddpns = (deparse_namespace *) lfirst(lc);
+         ListCell   *lc2;
+
+         foreach(lc2, olddpns->rtable_names)
+         {
+             char       *oldname = (char *) lfirst(lc2);
+
+             if (oldname == NULL)
+                 continue;
+             hentry = (NameHashEntry *) hash_search(names_hash,
+                                                    oldname,
+                                                    HASH_ENTER,
+                                                    &found);
+             /* we do not complain about duplicate names in parent namespaces */
+             hentry->counter = 0;
+         }
+     }
+
+     /* Now we can scan the rtable */
+     rtindex = 1;
      foreach(lc, dpns->rtable)
      {
          RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
          char       *refname;

+         /* Just in case this takes an unreasonable amount of time ... */
+         CHECK_FOR_INTERRUPTS();
+
          if (rels_used && !bms_is_member(rtindex, rels_used))
          {
              /* Ignore unreferenced RTE */
*************** set_rtable_names(deparse_namespace *dpns
*** 2712,2767 ****
          }

          /*
!          * If the selected name isn't unique, append digits to make it so
           */
!         if (refname &&
!             !refname_is_unique(refname, dpns, parent_namespaces))
          {
!             char       *modname = (char *) palloc(strlen(refname) + 32);
!             int            i = 0;

!             do
              {
!                 sprintf(modname, "%s_%d", refname, ++i);
!             } while (!refname_is_unique(modname, dpns, parent_namespaces));
!             refname = modname;
          }

          dpns->rtable_names = lappend(dpns->rtable_names, refname);
          rtindex++;
      }
- }
-
- /*
-  * refname_is_unique: is refname distinct from all already-chosen RTE names?
-  */
- static bool
- refname_is_unique(char *refname, deparse_namespace *dpns,
-                   List *parent_namespaces)
- {
-     ListCell   *lc;
-
-     foreach(lc, dpns->rtable_names)
-     {
-         char       *oldname = (char *) lfirst(lc);
-
-         if (oldname && strcmp(oldname, refname) == 0)
-             return false;
-     }
-     foreach(lc, parent_namespaces)
-     {
-         deparse_namespace *olddpns = (deparse_namespace *) lfirst(lc);
-         ListCell   *lc2;
-
-         foreach(lc2, olddpns->rtable_names)
-         {
-             char       *oldname = (char *) lfirst(lc2);

!             if (oldname && strcmp(oldname, refname) == 0)
!                 return false;
!         }
!     }
!     return true;
  }

  /*
--- 2766,2809 ----
          }

          /*
!          * If the selected name isn't unique, append digits to make it so, and
!          * make a new hash entry for it once we've got a unique name.
           */
!         if (refname)
          {
!             hentry = (NameHashEntry *) hash_search(names_hash,
!                                                    refname,
!                                                    HASH_ENTER,
!                                                    &found);
!             if (found)
!             {
!                 /* Name already in use, must choose a new one */
!                 char       *modname = (char *) palloc(strlen(refname) + 32);
!                 NameHashEntry *hentry2;

!                 do
!                 {
!                     sprintf(modname, "%s_%d", refname, ++(hentry->counter));
!                     hentry2 = (NameHashEntry *) hash_search(names_hash,
!                                                             modname,
!                                                             HASH_ENTER,
!                                                             &found);
!                 } while (found);
!                 hentry2->counter = 0;    /* init new hash entry */
!                 refname = modname;
!             }
!             else
              {
!                 /* Name not previously used, need only initialize hentry */
!                 hentry->counter = 0;
!             }
          }

          dpns->rtable_names = lappend(dpns->rtable_names, refname);
          rtindex++;
      }

!     hash_destroy(names_hash);
  }

  /*

pgsql-hackers by date:

Previous
From: Gavin Flower
Date:
Subject: Re: Parallel Seq Scan
Next
From: "andres@anarazel.de"
Date:
Subject: Re: [PATCH] Refactoring of LWLock tranches