[HACKERS] Faster pg_timezone_names view - Mailing list pgsql-hackers

From Tom Lane
Subject [HACKERS] Faster pg_timezone_names view
Date
Msg-id 27962.1493671706@sss.pgh.pa.us
Whole thread Raw
List pgsql-hackers
I've been casting about for low-hanging fruit for making the regression
tests run faster.  One thing I noticed is that this query in sysviews.sql
is one of the slowest queries in the whole suite:

select count(distinct utc_offset) >= 24 as ok from pg_timezone_names;

Reading pg_timezone_names takes upwards of 300 msec on my workstation,
and north of 3 seconds on some of my slower buildfarm critters.  I'd
always figured that, since it's reading the whole of the timezone data
directory tree, it's just naturally got to be slow.  However, I chanced
to try strace'ing a scan of pg_timezone_names, and what I saw was just
horrid.  We do something like this for *each* timezone data file:

stat("/usr/share/zoneinfo/posix/America/Iqaluit", {st_mode=S_IFREG|0644, st_size=2000, ...}) = 0
open("/usr/share/zoneinfo", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 20
getdents(20, /* 68 entries */, 32768)   = 1968
close(20)                               = 0
open("/usr/share/zoneinfo/posix", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 20
getdents(20, /* 62 entries */, 32768)   = 1776
close(20)                               = 0
open("/usr/share/zoneinfo/posix/America", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 20
getdents(20, /* 146 entries */, 32768)  = 4696
close(20)                               = 0
open("/usr/share/zoneinfo/posix/America/Iqaluit", O_RDONLY) = 20
read(20, "TZif2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\10\0\0\0\10\0\0\0\0"..., 54968) = 2000
close(20)                               = 0
open("/usr/share/zoneinfo", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 20
getdents(20, /* 68 entries */, 32768)   = 1968
close(20)                               = 0
open("/usr/share/zoneinfo/posixrules", O_RDONLY) = 20
read(20, "TZif2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\4\0\0\0\4\0\0\0\0"..., 54968) = 3519
close(20)                               = 0

That is, having found a data file by dint of searching the directory tree,
we *repeat the search* from the root of the timezone tree.  And then we
read the posixrules file, in addition to the target zoneinfo file itself.
And just to add insult to injury, we search the directory path down to
posixrules, though fortunately that's just one level down.

There are a couple of things going on here.  One is that pg_open_tzfile()
performs the repeat directory searches we're seeing above, because it is
tasked with accepting a case-insensitive spelling of a timezone name and
returning the correctly-cased zone name as seen in the file system.
That's necessary, and not too expensive, when performing something like
a "SET timezone" command, because we don't insist that the user spell
the zone name canonically in SET.  But it's pretty silly in the context
of a directory scan, because we already know the canonical spelling of
the file path: we just read it from the filesystem.

The second problem is that we don't cache the result of reading
posixrules, even though we need it for practically every zone load.
This seems like a performance bug in the upstream IANA library,
though I'm not sure that they care about use-cases like this one.
(They might also object that they don't want to cache data that
could conceivably change on-disk; but we already cache timezone
data at other levels, so there's no reason not to do it here.)

The attached patch adjusts both of these things.  On my workstation
it reduces the runtime of the pg_timezone_names view by better than
a factor of 3.

If I were to commit this, I'd want to back-patch, because experience
has shown that we don't want any cross-branch variation in the timezone
code files; absorbing upstream fixes is painful enough without that.
But it seems like a pretty safe and helpful thing to back-patch.

Thoughts, objections?

            regards, tom lane

diff --git a/src/timezone/localtime.c b/src/timezone/localtime.c
index 154124e..a2faa82 100644
*** a/src/timezone/localtime.c
--- b/src/timezone/localtime.c
*************** static const pg_time_t time_t_min = MINV
*** 55,60 ****
--- 55,67 ----
  static const pg_time_t time_t_max = MAXVAL(pg_time_t, TYPE_BIT(pg_time_t));

  /*
+  * We cache the result of trying to load the TZDEFRULES zone here.
+  * tzdefrules_loaded is 0 if not tried yet, +1 if good, -1 if failed.
+  */
+ static struct state tzdefrules_s;
+ static int    tzdefrules_loaded = 0;
+
+ /*
   * The DST rules to use if TZ has no rules and we can't load TZDEFRULES.
   * We default to US rules as of 1999-08-17.
   * POSIX 1003.1 section 8.1.1 says that the default DST rules are
*************** tzparse(const char *name, struct state *
*** 942,948 ****
          charcnt = stdlen + 1;
          if (sizeof sp->chars < charcnt)
              return false;
!         load_ok = tzload(TZDEFRULES, NULL, sp, false) == 0;
      }
      if (!load_ok)
          sp->leapcnt = 0;        /* so, we're off a little */
--- 949,969 ----
          charcnt = stdlen + 1;
          if (sizeof sp->chars < charcnt)
              return false;
!
!         /*
!          * This bit also differs from the IANA code, which doesn't make any
!          * attempt to avoid repetitive loadings of the TZDEFRULES zone.
!          */
!         if (tzdefrules_loaded == 0)
!         {
!             if (tzload(TZDEFRULES, NULL, &tzdefrules_s, false) == 0)
!                 tzdefrules_loaded = 1;
!             else
!                 tzdefrules_loaded = -1;
!         }
!         load_ok = (tzdefrules_loaded > 0);
!         if (load_ok)
!             memcpy(sp, &tzdefrules_s, sizeof(struct state));
      }
      if (!load_ok)
          sp->leapcnt = 0;        /* so, we're off a little */
diff --git a/src/timezone/pgtz.c b/src/timezone/pgtz.c
index 13ce399..31dbf89 100644
*** a/src/timezone/pgtz.c
--- b/src/timezone/pgtz.c
*************** pg_open_tzfile(const char *name, char *c
*** 80,91 ****
      int            fullnamelen;
      int            orignamelen;

      /*
       * Loop to split the given name into directory levels; for each level,
       * search using scan_directory_ci().
       */
-     strlcpy(fullname, pg_TZDIR(), sizeof(fullname));
-     orignamelen = fullnamelen = strlen(fullname);
      fname = name;
      for (;;)
      {
--- 80,116 ----
      int            fullnamelen;
      int            orignamelen;

+     /* Initialize fullname with base name of tzdata directory */
+     strlcpy(fullname, pg_TZDIR(), sizeof(fullname));
+     orignamelen = fullnamelen = strlen(fullname);
+
+     if (fullnamelen + 1 + strlen(name) >= MAXPGPATH)
+         return -1;                /* not gonna fit */
+
+     /*
+      * If the caller doesn't need the canonical spelling, first just try to
+      * open the name as-is.  This can be expected to succeed if the given name
+      * is already case-correct, or if the filesystem is case-insensitive; and
+      * we don't need to distinguish those situations if we aren't tasked with
+      * reporting the canonical spelling.
+      */
+     if (canonname == NULL)
+     {
+         int            result;
+
+         fullname[fullnamelen] = '/';
+         /* test above ensured this will fit: */
+         strcpy(fullname + fullnamelen + 1, name);
+         result = open(fullname, O_RDONLY | PG_BINARY, 0);
+         if (result >= 0)
+             return result;
+         /* If that didn't work, fall through to do it the hard way */
+     }
+
      /*
       * Loop to split the given name into directory levels; for each level,
       * search using scan_directory_ci().
       */
      fname = name;
      for (;;)
      {
*************** pg_open_tzfile(const char *name, char *c
*** 97,104 ****
              fnamelen = slashptr - fname;
          else
              fnamelen = strlen(fname);
-         if (fullnamelen + 1 + fnamelen >= MAXPGPATH)
-             return -1;            /* not gonna fit */
          if (!scan_directory_ci(fullname, fname, fnamelen,
                                 fullname + fullnamelen + 1,
                                 MAXPGPATH - fullnamelen - 1))
--- 122,127 ----
*************** pg_tzenumerate_next(pg_tzenum *dir)
*** 458,467 ****

          /*
           * Load this timezone using tzload() not pg_tzset(), so we don't fill
!          * the cache
           */
!         if (tzload(fullname + dir->baselen, dir->tz.TZname, &dir->tz.state,
!                    true) != 0)
          {
              /* Zone could not be loaded, ignore it */
              continue;
--- 481,491 ----

          /*
           * Load this timezone using tzload() not pg_tzset(), so we don't fill
!          * the cache.  Also, don't ask for the canonical spelling: we already
!          * know it, and pg_open_tzfile's way of finding it out is pretty
!          * inefficient.
           */
!         if (tzload(fullname + dir->baselen, NULL, &dir->tz.state, true) != 0)
          {
              /* Zone could not be loaded, ignore it */
              continue;
*************** pg_tzenumerate_next(pg_tzenum *dir)
*** 473,478 ****
--- 497,506 ----
              continue;
          }

+         /* OK, return the canonical zone name spelling. */
+         strlcpy(dir->tz.TZname, fullname + dir->baselen,
+                 sizeof(dir->tz.TZname));
+
          /* Timezone loaded OK. */
          return &dir->tz;
      }

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

pgsql-hackers by date:

Previous
From: Peter Geoghegan
Date:
Subject: Re: [HACKERS] A design for amcheck heapam verification
Next
From: Peter Geoghegan
Date:
Subject: Re: [HACKERS] A design for amcheck heapam verification