Re: Load TIME fields - proposed performance improvement - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: Load TIME fields - proposed performance improvement |
Date | |
Msg-id | 1906375.1601050625@sss.pgh.pa.us Whole thread Raw |
In response to | Re: Load TIME fields - proposed performance improvement (Peter Smith <smithpb2250@gmail.com>) |
Responses |
Re: Load TIME fields - proposed performance improvement
|
List | pgsql-hackers |
Peter Smith <smithpb2250@gmail.com> writes: > The patch has been re-implemented based on previous advice. > Please see attached. Hm, so: 1. The caching behavior really needs to account for the possibility of the timezone setting being changed intra-transaction. That's not very likely, perhaps, but we can't deliver a wrong answer. It seems best to make the dependency on session_timezone explicit instead of allowing it to be hidden inside timestamp2tm. 2. I'd had in mind just one cache, not several. Admittedly it doesn't gain that much for different code paths to share the result, but it seems silly not to, especially given the relative complexity of getting the caching right. That led me to refactor the patch as attached. (I'd first thought that we could just leave the j2date calculation in GetSQLCurrentDate out of the cache, but performance measurements showed that it is worthwhile to cache that. An advantage of the way it's done here is we probably won't have to redo j2date even across transactions.) > (represents 19% improvement for this worst case table/data) I'm getting a hair under 30% improvement on this admittedly artificial test case, for either your patch or mine. > Note: I patched the function GetCurrentTimeUsec consistently with the > others, but actually I was not able to discover any SQL syntax which > could cause that function to be invoked multiple times. Perhaps the > patch for that function should be removed? The existing callers for that are concerned with interpreting "now" in datetime input strings. It's certainly possible to have to do that more than once per transaction --- an example would be COPY IN where a lot of rows contain "now" as the value of a datetime column. regards, tom lane diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c index eaaffa7137..057051fa85 100644 --- a/src/backend/utils/adt/date.c +++ b/src/backend/utils/adt/date.c @@ -299,20 +299,31 @@ EncodeSpecialDate(DateADT dt, char *str) DateADT GetSQLCurrentDate(void) { - TimestampTz ts; - struct pg_tm tt, - *tm = &tt; - fsec_t fsec; - int tz; + struct pg_tm tm; - ts = GetCurrentTransactionStartTimestamp(); + static int cache_year = 0; + static int cache_mon = 0; + static int cache_mday = 0; + static DateADT cache_date; - if (timestamp2tm(ts, &tz, tm, &fsec, NULL, NULL) != 0) - ereport(ERROR, - (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), - errmsg("timestamp out of range"))); + GetCurrentDateTime(&tm); - return date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE; + /* + * date2j involves several integer divisions; moreover, unless our session + * lives across local midnight, we don't really have to do it more than + * once. So it seems worth having a separate cache here. + */ + if (tm.tm_year != cache_year || + tm.tm_mon != cache_mon || + tm.tm_mday != cache_mday) + { + cache_date = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE; + cache_year = tm.tm_year; + cache_mon = tm.tm_mon; + cache_mday = tm.tm_mday; + } + + return cache_date; } /* @@ -322,18 +333,12 @@ TimeTzADT * GetSQLCurrentTime(int32 typmod) { TimeTzADT *result; - TimestampTz ts; struct pg_tm tt, *tm = &tt; fsec_t fsec; int tz; - ts = GetCurrentTransactionStartTimestamp(); - - if (timestamp2tm(ts, &tz, tm, &fsec, NULL, NULL) != 0) - ereport(ERROR, - (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), - errmsg("timestamp out of range"))); + GetCurrentTimeUsec(tm, &fsec, &tz); result = (TimeTzADT *) palloc(sizeof(TimeTzADT)); tm2timetz(tm, fsec, tz, result); @@ -348,18 +353,12 @@ TimeADT GetSQLLocalTime(int32 typmod) { TimeADT result; - TimestampTz ts; struct pg_tm tt, *tm = &tt; fsec_t fsec; int tz; - ts = GetCurrentTransactionStartTimestamp(); - - if (timestamp2tm(ts, &tz, tm, &fsec, NULL, NULL) != 0) - ereport(ERROR, - (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), - errmsg("timestamp out of range"))); + GetCurrentTimeUsec(tm, &fsec, &tz); tm2time(tm, fsec, &result); AdjustTimeForTypmod(&result, typmod); diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c index dec2fad82a..bc682584f0 100644 --- a/src/backend/utils/adt/datetime.c +++ b/src/backend/utils/adt/datetime.c @@ -339,35 +339,78 @@ j2day(int date) /* * GetCurrentDateTime() * - * Get the transaction start time ("now()") broken down as a struct pg_tm. + * Get the transaction start time ("now()") broken down as a struct pg_tm, + * according to the session timezone setting. */ void GetCurrentDateTime(struct pg_tm *tm) { - int tz; fsec_t fsec; - timestamp2tm(GetCurrentTransactionStartTimestamp(), &tz, tm, &fsec, - NULL, NULL); - /* Note: don't pass NULL tzp to timestamp2tm; affects behavior */ + /* This is just a convenience wrapper for GetCurrentTimeUsec */ + GetCurrentTimeUsec(tm, &fsec, NULL); } /* * GetCurrentTimeUsec() * * Get the transaction start time ("now()") broken down as a struct pg_tm, - * including fractional seconds and timezone offset. + * including fractional seconds and timezone offset. The time is converted + * according to the session timezone setting. + * + * Callers may pass tzp = NULL if they don't need the offset, but this does + * not affect the conversion behavior (unlike timestamp2tm()). + * + * Internally, we cache the result, since this could be called many times + * in a transaction, within which now() doesn't change. */ void GetCurrentTimeUsec(struct pg_tm *tm, fsec_t *fsec, int *tzp) { - int tz; + TimestampTz cur_ts = GetCurrentTransactionStartTimestamp(); + + /* + * The cache key must include both current time and current timezone. By + * representing the timezone by just a pointer, we're assuming that + * distinct timezone settings could never have the same pointer value. + * This is true by virtue of the hashtable used inside pg_tzset(); + * however, it might need another look if we ever allow entries in that + * hash to be recycled. + */ + static TimestampTz cache_ts = 0; + static pg_tz *cache_timezone = NULL; + static struct pg_tm cache_tm; + static fsec_t cache_fsec; + static int cache_tz; + + if (cur_ts != cache_ts || session_timezone != cache_timezone) + { + /* + * Make sure cache is marked invalid in case of error after partial + * update within timestamp2tm. + */ + cache_timezone = NULL; + + /* + * Perform the computation, storing results into cache. We do not + * really expect any error here, since current time surely ought to be + * within range, but check just for sanity's sake. + */ + if (timestamp2tm(cur_ts, &cache_tz, &cache_tm, &cache_fsec, + NULL, session_timezone) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + /* OK, so mark the cache valid. */ + cache_ts = cur_ts; + cache_timezone = session_timezone; + } - timestamp2tm(GetCurrentTransactionStartTimestamp(), &tz, tm, fsec, - NULL, NULL); - /* Note: don't pass NULL tzp to timestamp2tm; affects behavior */ + *tm = cache_tm; + *fsec = cache_fsec; if (tzp != NULL) - *tzp = tz; + *tzp = cache_tz; }
pgsql-hackers by date: