From b295773ecb47ef58c067448cedc09b4537ee2d13 Mon Sep 17 00:00:00 2001 From: Lukas Fittl Date: Fri, 25 Jul 2025 17:57:20 -0700 Subject: [PATCH v7 2/4] Timing: Streamline ticks to nanosecond conversion across platforms The timing infrastructure (INSTR_* macros) measures time elapsed using clock_gettime() on POSIX systems, which returns the time as nanoseconds, and QueryPerformanceCounter() on Windows, which is a specialized timing clock source that returns a tick counter that needs to be converted to nanoseconds using the result of QueryPerformanceFrequency(). This conversion currently happens ad-hoc on Windows, e.g. when calling INSTR_TIME_GET_NANOSEC, which calls QueryPerformanceFrequency() on every invocation, despite the frequency being stable after program start, incurring unnecessary overhead. It also causes a fractured implementation where macros are defined differently between platforms. To ease code readability, and prepare for a future change that intends to use a ticks-to-nanosecond conversion on x86-64 for TSC use, introduce a new pg_ticks_to_ns() function that gets called on all platforms. This function relies on a separately initialized ticks_per_ns_scaled value, that represents the conversion ratio. This value is initialized from QueryPerformanceFrequency() on Windows, and set to zero on x86-64 POSIX systems, which results in the ticks being treated as nanoseconds. Other architectures always directly return the original ticks. To support this, pg_initialize_timing() is introduced, and is now mandatory for both the backend and any frontend programs to call before utilizing INSTR_* macros. In passing modify pg_test_timing to reduce the per-loop overhead caused by repeated divisions in INSTR_TIME_GET_NANOSEC when the ticks variable has become very large. Instead diff first and then turn it into nanosecs. Author: Lukas Fittl Author: Andres Freund Author: David Geier Reviewed-by: Discussion: https://www.postgresql.org/message-id/flat/20200612232810.f46nbqkdhbutzqdg%40alap3.anarazel.de --- src/backend/postmaster/postmaster.c | 3 + src/bin/pg_test_timing/pg_test_timing.c | 18 ++-- src/bin/pgbench/pgbench.c | 3 + src/bin/psql/startup.c | 4 + src/common/Makefile | 1 + src/common/instr_time.c | 85 +++++++++++++++++++ src/common/meson.build | 1 + src/include/portability/instr_time.h | 106 +++++++++++++++++------- 8 files changed, 181 insertions(+), 40 deletions(-) create mode 100644 src/common/instr_time.c diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index d6133bfebc6..0ee2e67a30a 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -588,6 +588,9 @@ PostmasterMain(int argc, char *argv[]) */ InitializeGUCOptions(); + /* initialize timing infrastructure (required for INSTR_* calls) */ + pg_initialize_timing(); + opterr = 1; /* diff --git a/src/bin/pg_test_timing/pg_test_timing.c b/src/bin/pg_test_timing/pg_test_timing.c index a5621251afc..9fd630a490a 100644 --- a/src/bin/pg_test_timing/pg_test_timing.c +++ b/src/bin/pg_test_timing/pg_test_timing.c @@ -43,6 +43,9 @@ main(int argc, char *argv[]) handle_args(argc, argv); + /* initialize timing infrastructure (required for INSTR_* calls) */ + pg_initialize_timing(); + loop_count = test_timing(test_duration); output(loop_count); @@ -155,11 +158,10 @@ test_timing(unsigned int duration) uint64 total_time; int64 time_elapsed = 0; uint64 loop_count = 0; - uint64 prev, - cur; instr_time start_time, end_time, - temp; + prev, + cur; /* * Pre-zero the statistics data structures. They're already zero by @@ -174,7 +176,7 @@ test_timing(unsigned int duration) total_time = duration > 0 ? duration * INT64CONST(1000000000) : 0; INSTR_TIME_SET_CURRENT(start_time); - cur = INSTR_TIME_GET_NANOSEC(start_time); + cur = start_time; while (time_elapsed < total_time) { @@ -182,9 +184,8 @@ test_timing(unsigned int duration) bits; prev = cur; - INSTR_TIME_SET_CURRENT(temp); - cur = INSTR_TIME_GET_NANOSEC(temp); - diff = cur - prev; + INSTR_TIME_SET_CURRENT(cur); + diff = INSTR_TIME_DIFF_NANOSEC(cur, prev); /* Did time go backwards? */ if (unlikely(diff < 0)) @@ -217,8 +218,7 @@ test_timing(unsigned int duration) largest_diff_count++; loop_count++; - INSTR_TIME_SUBTRACT(temp, start_time); - time_elapsed = INSTR_TIME_GET_NANOSEC(temp); + time_elapsed = INSTR_TIME_DIFF_NANOSEC(cur, start_time); } INSTR_TIME_SET_CURRENT(end_time); diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 58735871c17..16f7790680b 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -7334,6 +7334,9 @@ main(int argc, char **argv) initRandomState(&state[i].cs_func_rs); } + /* initialize timing infrastructure (required for INSTR_* calls) */ + pg_initialize_timing(); + /* opening connection... */ con = doConnect(); if (con == NULL) diff --git a/src/bin/psql/startup.c b/src/bin/psql/startup.c index 9a397ec87b7..69d044d405d 100644 --- a/src/bin/psql/startup.c +++ b/src/bin/psql/startup.c @@ -24,6 +24,7 @@ #include "help.h" #include "input.h" #include "mainloop.h" +#include "portability/instr_time.h" #include "settings.h" /* @@ -327,6 +328,9 @@ main(int argc, char *argv[]) PQsetNoticeProcessor(pset.db, NoticeProcessor, NULL); + /* initialize timing infrastructure (required for INSTR_* calls) */ + pg_initialize_timing(); + SyncVariables(); if (options.list_dbs) diff --git a/src/common/Makefile b/src/common/Makefile index 2c720caa509..1a2fbbe887f 100644 --- a/src/common/Makefile +++ b/src/common/Makefile @@ -59,6 +59,7 @@ OBJS_COMMON = \ file_perm.o \ file_utils.o \ hashfn.o \ + instr_time.o \ ip.o \ jsonapi.o \ keywords.o \ diff --git a/src/common/instr_time.c b/src/common/instr_time.c new file mode 100644 index 00000000000..3e8ee2d6db2 --- /dev/null +++ b/src/common/instr_time.c @@ -0,0 +1,85 @@ +/*------------------------------------------------------------------------- + * + * instr_time.c + * Non-inline parts of the portable high-precision interval timing + * implementation + * + * Portions Copyright (c) 2026, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/port/instr_time.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "portability/instr_time.h" + +/* + * Stores what the number of ticks needs to be multiplied with to end up + * with nanoseconds using integer math. + * + * On certain platforms (currently Windows) the ticks to nanoseconds conversion + * requires floating point math because: + * + * sec = ticks / frequency_hz + * ns = ticks / frequency_hz * 1,000,000,000 + * ns = ticks * (1,000,000,000 / frequency_hz) + * ns = ticks * (1,000,000 / frequency_khz) <-- now in kilohertz + * + * Here, 'ns' is usually a floating number. For example for a 2.5 GHz CPU + * the scaling factor becomes 1,000,000 / 2,500,000 = 1.2. + * + * To be able to use integer math we work around the lack of precision. We + * first scale the integer up and after the multiplication by the number + * of ticks in INSTR_TIME_GET_NANOSEC() we divide again by the same value. + * We picked the scaler such that it provides enough precision and is a + * power-of-two which allows for shifting instead of doing an integer + * division. We utilize unsigned integers even though ticks are stored as a + * signed value because that encourages compilers to generate better assembly. + * + * On all other platforms we are using clock_gettime(), which uses nanoseconds + * as ticks. Hence, we set the multiplier to zero, which causes pg_ticks_to_ns + * to return the original value. + */ +uint64 ticks_per_ns_scaled = 0; +uint64 max_ticks_no_overflow = 0; + +static void set_ticks_per_ns(void); + +void +pg_initialize_timing() +{ + set_ticks_per_ns(); +} + +#ifndef WIN32 + +static void +set_ticks_per_ns() +{ + ticks_per_ns_scaled = 0; + max_ticks_no_overflow = 0; +} + +#else /* WIN32 */ + +/* GetTimerFrequency returns counts per second */ +static inline double +GetTimerFrequency(void) +{ + LARGE_INTEGER f; + + QueryPerformanceFrequency(&f); + return (double) f.QuadPart; +} + +static void +set_ticks_per_ns() +{ + ticks_per_ns_scaled = INT64CONST(1000000000) * TICKS_TO_NS_PRECISION / GetTimerFrequency(); + max_ticks_no_overflow = PG_INT64_MAX / ticks_per_ns_scaled; +} + +#endif /* WIN32 */ diff --git a/src/common/meson.build b/src/common/meson.build index b757618a9c9..042edb7473a 100644 --- a/src/common/meson.build +++ b/src/common/meson.build @@ -13,6 +13,7 @@ common_sources = files( 'file_perm.c', 'file_utils.c', 'hashfn.c', + 'instr_time.c', 'ip.c', 'jsonapi.c', 'keywords.c', diff --git a/src/include/portability/instr_time.h b/src/include/portability/instr_time.h index 490593d1825..f8145ae2af7 100644 --- a/src/include/portability/instr_time.h +++ b/src/include/portability/instr_time.h @@ -26,6 +26,8 @@ * * INSTR_TIME_SUBTRACT(x, y) x -= y * + * INSTR_TIME_DIFF_NANOSEC(x, y) x - y (in nanoseconds) + * * INSTR_TIME_ACCUM_DIFF(x, y, z) x += (y - z) * * INSTR_TIME_GET_DOUBLE(t) convert t to double (in seconds) @@ -78,11 +80,29 @@ typedef struct instr_time #define NS_PER_MS INT64CONST(1000000) #define NS_PER_US INT64CONST(1000) +/* + * Make sure this is a power-of-two, so that the compiler can turn the + * multiplications and divisions into shifts. + */ +#define TICKS_TO_NS_PRECISION (1<<14) -#ifndef WIN32 +/* + * Variables used to translate ticks to nanoseconds, initialized by + * pg_initialize_timing. + */ +extern PGDLLIMPORT uint64 ticks_per_ns_scaled; +extern PGDLLIMPORT uint64 max_ticks_no_overflow; +/* + * Initialize timing infrastructure + * + * This must be called at least once before using INSTR_TIME_SET_CURRENT* macros. + */ +extern void pg_initialize_timing(void); -/* Use clock_gettime() */ +#ifndef WIN32 + +/* On POSIX, use clock_gettime() for system clock source */ #include @@ -106,9 +126,8 @@ typedef struct instr_time #define PG_INSTR_CLOCK CLOCK_REALTIME #endif -/* helper for INSTR_TIME_SET_CURRENT */ static inline instr_time -pg_clock_gettime_ns(void) +pg_get_ticks(void) { instr_time now; struct timespec tmp; @@ -119,21 +138,12 @@ pg_clock_gettime_ns(void) return now; } -#define INSTR_TIME_SET_CURRENT(t) \ - ((t) = pg_clock_gettime_ns()) - -#define INSTR_TIME_GET_NANOSEC(t) \ - ((int64) (t).ticks) - - #else /* WIN32 */ +/* On Windows, use QueryPerformanceCounter() for system clock source */ -/* Use QueryPerformanceCounter() */ - -/* helper for INSTR_TIME_SET_CURRENT */ static inline instr_time -pg_query_performance_counter(void) +pg_get_ticks(void) { instr_time now; LARGE_INTEGER tmp; @@ -144,23 +154,50 @@ pg_query_performance_counter(void) return now; } -static inline double -GetTimerFrequency(void) -{ - LARGE_INTEGER f; - - QueryPerformanceFrequency(&f); - return (double) f.QuadPart; -} - -#define INSTR_TIME_SET_CURRENT(t) \ - ((t) = pg_query_performance_counter()) - -#define INSTR_TIME_GET_NANOSEC(t) \ - ((int64) ((t).ticks * ((double) NS_PER_S / GetTimerFrequency()))) - #endif /* WIN32 */ +static inline int64 +pg_ticks_to_ns(int64 ticks) +{ +#if defined(__x86_64__) || defined(WIN32) + int64 ns = 0; + + if (ticks_per_ns_scaled == 0) + return ticks; + + /* + * Would multiplication overflow? If so perform computation in two parts. + * Check overflow without actually overflowing via: a * b > max <=> a > + * max / b + */ + if (unlikely(ticks > (int64) max_ticks_no_overflow)) + { + /* + * Compute how often the maximum number of ticks fits completely into + * the number of elapsed ticks and convert that number into + * nanoseconds. Then multiply by the count to arrive at the final + * value. In a 2nd step we adjust the number of elapsed ticks and + * convert the remaining ticks. + */ + int64 count = ticks / max_ticks_no_overflow; + int64 max_ns = max_ticks_no_overflow * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION; + + ns = max_ns * count; + + /* + * Subtract the ticks that we now already accounted for, so that they + * don't get counted twice. + */ + ticks -= count * max_ticks_no_overflow; + Assert(ticks >= 0); + } + + ns += ticks * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION; + return ns; +#else + return ticks; +#endif +} /* * Common macros @@ -168,12 +205,13 @@ GetTimerFrequency(void) #define INSTR_TIME_IS_ZERO(t) ((t).ticks == 0) - #define INSTR_TIME_SET_ZERO(t) ((t).ticks = 0) #define INSTR_TIME_SET_CURRENT_LAZY(t) \ (INSTR_TIME_IS_ZERO(t) ? INSTR_TIME_SET_CURRENT(t), true : false) +#define INSTR_TIME_SET_CURRENT(t) \ + ((t) = pg_get_ticks()) #define INSTR_TIME_ADD(x,y) \ ((x).ticks += (y).ticks) @@ -181,12 +219,18 @@ GetTimerFrequency(void) #define INSTR_TIME_SUBTRACT(x,y) \ ((x).ticks -= (y).ticks) +#define INSTR_TIME_DIFF_NANOSEC(x,y) \ + (pg_ticks_to_ns((x).ticks - (y).ticks)) + #define INSTR_TIME_ACCUM_DIFF(x,y,z) \ ((x).ticks += (y).ticks - (z).ticks) #define INSTR_TIME_LT(x,y) \ ((x).ticks > (y).ticks) +#define INSTR_TIME_GET_NANOSEC(t) \ + (pg_ticks_to_ns((t).ticks)) + #define INSTR_TIME_GET_DOUBLE(t) \ ((double) INSTR_TIME_GET_NANOSEC(t) / NS_PER_S) -- 2.47.1