Re: Load TIME fields - proposed performance improvement - Mailing list pgsql-hackers

From Tom Lane
Subject Re: Load TIME fields - proposed performance improvement
Date
Msg-id 1906375.1601050625@sss.pgh.pa.us
Whole thread Raw
In response to Re: Load TIME fields - proposed performance improvement  (Peter Smith <smithpb2250@gmail.com>)
Responses Re: Load TIME fields - proposed performance improvement  (Peter Smith <smithpb2250@gmail.com>)
List pgsql-hackers
Peter Smith <smithpb2250@gmail.com> writes:
> The patch has been re-implemented based on previous advice.
> Please see attached.

Hm, so:

1. The caching behavior really needs to account for the possibility of
the timezone setting being changed intra-transaction.  That's not very
likely, perhaps, but we can't deliver a wrong answer.  It seems best
to make the dependency on session_timezone explicit instead of
allowing it to be hidden inside timestamp2tm.

2. I'd had in mind just one cache, not several.  Admittedly it
doesn't gain that much for different code paths to share the result,
but it seems silly not to, especially given the relative complexity
of getting the caching right.

That led me to refactor the patch as attached.  (I'd first thought
that we could just leave the j2date calculation in GetSQLCurrentDate
out of the cache, but performance measurements showed that it is
worthwhile to cache that.  An advantage of the way it's done here
is we probably won't have to redo j2date even across transactions.)

> (represents 19% improvement for this worst case table/data)

I'm getting a hair under 30% improvement on this admittedly
artificial test case, for either your patch or mine.

> Note: I patched the function GetCurrentTimeUsec consistently with the
> others, but actually I was not able to discover any SQL syntax which
> could cause that function to be invoked multiple times. Perhaps the
> patch for that function should be removed?

The existing callers for that are concerned with interpreting "now"
in datetime input strings.  It's certainly possible to have to do that
more than once per transaction --- an example would be COPY IN where
a lot of rows contain "now" as the value of a datetime column.

            regards, tom lane

diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c
index eaaffa7137..057051fa85 100644
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -299,20 +299,31 @@ EncodeSpecialDate(DateADT dt, char *str)
 DateADT
 GetSQLCurrentDate(void)
 {
-    TimestampTz ts;
-    struct pg_tm tt,
-               *tm = &tt;
-    fsec_t        fsec;
-    int            tz;
+    struct pg_tm tm;

-    ts = GetCurrentTransactionStartTimestamp();
+    static int    cache_year = 0;
+    static int    cache_mon = 0;
+    static int    cache_mday = 0;
+    static DateADT cache_date;

-    if (timestamp2tm(ts, &tz, tm, &fsec, NULL, NULL) != 0)
-        ereport(ERROR,
-                (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
-                 errmsg("timestamp out of range")));
+    GetCurrentDateTime(&tm);

-    return date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE;
+    /*
+     * date2j involves several integer divisions; moreover, unless our session
+     * lives across local midnight, we don't really have to do it more than
+     * once.  So it seems worth having a separate cache here.
+     */
+    if (tm.tm_year != cache_year ||
+        tm.tm_mon != cache_mon ||
+        tm.tm_mday != cache_mday)
+    {
+        cache_date = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
+        cache_year = tm.tm_year;
+        cache_mon = tm.tm_mon;
+        cache_mday = tm.tm_mday;
+    }
+
+    return cache_date;
 }

 /*
@@ -322,18 +333,12 @@ TimeTzADT *
 GetSQLCurrentTime(int32 typmod)
 {
     TimeTzADT  *result;
-    TimestampTz ts;
     struct pg_tm tt,
                *tm = &tt;
     fsec_t        fsec;
     int            tz;

-    ts = GetCurrentTransactionStartTimestamp();
-
-    if (timestamp2tm(ts, &tz, tm, &fsec, NULL, NULL) != 0)
-        ereport(ERROR,
-                (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
-                 errmsg("timestamp out of range")));
+    GetCurrentTimeUsec(tm, &fsec, &tz);

     result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
     tm2timetz(tm, fsec, tz, result);
@@ -348,18 +353,12 @@ TimeADT
 GetSQLLocalTime(int32 typmod)
 {
     TimeADT        result;
-    TimestampTz ts;
     struct pg_tm tt,
                *tm = &tt;
     fsec_t        fsec;
     int            tz;

-    ts = GetCurrentTransactionStartTimestamp();
-
-    if (timestamp2tm(ts, &tz, tm, &fsec, NULL, NULL) != 0)
-        ereport(ERROR,
-                (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
-                 errmsg("timestamp out of range")));
+    GetCurrentTimeUsec(tm, &fsec, &tz);

     tm2time(tm, fsec, &result);
     AdjustTimeForTypmod(&result, typmod);
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index dec2fad82a..bc682584f0 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -339,35 +339,78 @@ j2day(int date)
 /*
  * GetCurrentDateTime()
  *
- * Get the transaction start time ("now()") broken down as a struct pg_tm.
+ * Get the transaction start time ("now()") broken down as a struct pg_tm,
+ * according to the session timezone setting.
  */
 void
 GetCurrentDateTime(struct pg_tm *tm)
 {
-    int            tz;
     fsec_t        fsec;

-    timestamp2tm(GetCurrentTransactionStartTimestamp(), &tz, tm, &fsec,
-                 NULL, NULL);
-    /* Note: don't pass NULL tzp to timestamp2tm; affects behavior */
+    /* This is just a convenience wrapper for GetCurrentTimeUsec */
+    GetCurrentTimeUsec(tm, &fsec, NULL);
 }

 /*
  * GetCurrentTimeUsec()
  *
  * Get the transaction start time ("now()") broken down as a struct pg_tm,
- * including fractional seconds and timezone offset.
+ * including fractional seconds and timezone offset.  The time is converted
+ * according to the session timezone setting.
+ *
+ * Callers may pass tzp = NULL if they don't need the offset, but this does
+ * not affect the conversion behavior (unlike timestamp2tm()).
+ *
+ * Internally, we cache the result, since this could be called many times
+ * in a transaction, within which now() doesn't change.
  */
 void
 GetCurrentTimeUsec(struct pg_tm *tm, fsec_t *fsec, int *tzp)
 {
-    int            tz;
+    TimestampTz cur_ts = GetCurrentTransactionStartTimestamp();
+
+    /*
+     * The cache key must include both current time and current timezone.  By
+     * representing the timezone by just a pointer, we're assuming that
+     * distinct timezone settings could never have the same pointer value.
+     * This is true by virtue of the hashtable used inside pg_tzset();
+     * however, it might need another look if we ever allow entries in that
+     * hash to be recycled.
+     */
+    static TimestampTz cache_ts = 0;
+    static pg_tz *cache_timezone = NULL;
+    static struct pg_tm cache_tm;
+    static fsec_t cache_fsec;
+    static int    cache_tz;
+
+    if (cur_ts != cache_ts || session_timezone != cache_timezone)
+    {
+        /*
+         * Make sure cache is marked invalid in case of error after partial
+         * update within timestamp2tm.
+         */
+        cache_timezone = NULL;
+
+        /*
+         * Perform the computation, storing results into cache.  We do not
+         * really expect any error here, since current time surely ought to be
+         * within range, but check just for sanity's sake.
+         */
+        if (timestamp2tm(cur_ts, &cache_tz, &cache_tm, &cache_fsec,
+                         NULL, session_timezone) != 0)
+            ereport(ERROR,
+                    (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+                     errmsg("timestamp out of range")));
+
+        /* OK, so mark the cache valid. */
+        cache_ts = cur_ts;
+        cache_timezone = session_timezone;
+    }

-    timestamp2tm(GetCurrentTransactionStartTimestamp(), &tz, tm, fsec,
-                 NULL, NULL);
-    /* Note: don't pass NULL tzp to timestamp2tm; affects behavior */
+    *tm = cache_tm;
+    *fsec = cache_fsec;
     if (tzp != NULL)
-        *tzp = tz;
+        *tzp = cache_tz;
 }



pgsql-hackers by date:

Previous
From: Li Japin
Date:
Subject: Optimize memory allocation code
Next
From: Andres Freund
Date:
Subject: Re: gs_group_1 crashing on 13beta2/s390x