From 8c2003b8c05b04be86a0e679930d092c35951dd1 Mon Sep 17 00:00:00 2001 From: Lukas Fittl Date: Fri, 25 Jul 2025 17:57:20 -0700 Subject: [PATCH v9 5/7] instrumentation: Streamline ticks to nanosecond conversion across platforms The timing infrastructure (INSTR_* macros) measures time elapsed using clock_gettime() on POSIX systems, which returns the time as nanoseconds, and QueryPerformanceCounter() on Windows, which is a specialized timing clock source that returns a tick counter that needs to be converted to nanoseconds using the result of QueryPerformanceFrequency(). This conversion currently happens ad-hoc on Windows, e.g. when calling INSTR_TIME_GET_NANOSEC, which calls QueryPerformanceFrequency() on every invocation, despite the frequency being stable after program start, incurring unnecessary overhead. It also causes a fractured implementation where macros are defined differently between platforms. To ease code readability, and prepare for a future change that intends to use a ticks-to-nanosecond conversion on x86-64 for TSC use, introduce a new pg_ticks_to_ns() function that gets called on all platforms. This function relies on a separately initialized ticks_per_ns_scaled value, that represents the conversion ratio. This value is initialized from QueryPerformanceFrequency() on Windows, and set to zero on x86-64 POSIX systems, which results in the ticks being treated as nanoseconds. Other architectures always directly return the original ticks. To support this, pg_initialize_timing() is introduced, and is now mandatory for both the backend and any frontend programs to call before utilizing INSTR_* macros. Author: Lukas Fittl Author: Andres Freund Author: David Geier Reviewed-by: Discussion: https://www.postgresql.org/message-id/flat/20200612232810.f46nbqkdhbutzqdg%40alap3.anarazel.de --- src/backend/postmaster/postmaster.c | 3 + src/bin/pg_test_timing/pg_test_timing.c | 3 + src/bin/pgbench/pgbench.c | 3 + src/bin/psql/startup.c | 4 + src/common/Makefile | 1 + src/common/instr_time.c | 91 ++++++++++++++++++++ src/common/meson.build | 1 + src/include/portability/instr_time.h | 105 +++++++++++++++++------- 8 files changed, 181 insertions(+), 30 deletions(-) create mode 100644 src/common/instr_time.c diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 3fac46c402b..60bd06ed665 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -588,6 +588,9 @@ PostmasterMain(int argc, char *argv[]) */ InitializeGUCOptions(); + /* initialize timing infrastructure (required for INSTR_* calls) */ + pg_initialize_timing(); + opterr = 1; /* diff --git a/src/bin/pg_test_timing/pg_test_timing.c b/src/bin/pg_test_timing/pg_test_timing.c index dd865ed8875..98672ae5d32 100644 --- a/src/bin/pg_test_timing/pg_test_timing.c +++ b/src/bin/pg_test_timing/pg_test_timing.c @@ -43,6 +43,9 @@ main(int argc, char *argv[]) handle_args(argc, argv); + /* initialize timing infrastructure (required for INSTR_* calls) */ + pg_initialize_timing(); + loop_count = test_timing(test_duration); output(loop_count); diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index cb4e986092e..c8b233be16c 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -7334,6 +7334,9 @@ main(int argc, char **argv) initRandomState(&state[i].cs_func_rs); } + /* initialize timing infrastructure (required for INSTR_* calls) */ + pg_initialize_timing(); + /* opening connection... */ con = doConnect(); if (con == NULL) diff --git a/src/bin/psql/startup.c b/src/bin/psql/startup.c index 9a397ec87b7..69d044d405d 100644 --- a/src/bin/psql/startup.c +++ b/src/bin/psql/startup.c @@ -24,6 +24,7 @@ #include "help.h" #include "input.h" #include "mainloop.h" +#include "portability/instr_time.h" #include "settings.h" /* @@ -327,6 +328,9 @@ main(int argc, char *argv[]) PQsetNoticeProcessor(pset.db, NoticeProcessor, NULL); + /* initialize timing infrastructure (required for INSTR_* calls) */ + pg_initialize_timing(); + SyncVariables(); if (options.list_dbs) diff --git a/src/common/Makefile b/src/common/Makefile index 2c720caa509..1a2fbbe887f 100644 --- a/src/common/Makefile +++ b/src/common/Makefile @@ -59,6 +59,7 @@ OBJS_COMMON = \ file_perm.o \ file_utils.o \ hashfn.o \ + instr_time.o \ ip.o \ jsonapi.o \ keywords.o \ diff --git a/src/common/instr_time.c b/src/common/instr_time.c new file mode 100644 index 00000000000..74244d64853 --- /dev/null +++ b/src/common/instr_time.c @@ -0,0 +1,91 @@ +/*------------------------------------------------------------------------- + * + * instr_time.c + * Non-inline parts of the portable high-precision interval timing + * implementation + * + * Portions Copyright (c) 2026, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/port/instr_time.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "portability/instr_time.h" + +/* + * Stores what the number of ticks needs to be multiplied with to end up + * with nanoseconds using integer math. + * + * On certain platforms (currently Windows) the ticks to nanoseconds conversion + * requires floating point math because: + * + * sec = ticks / frequency_hz + * ns = ticks / frequency_hz * 1,000,000,000 + * ns = ticks * (1,000,000,000 / frequency_hz) + * ns = ticks * (1,000,000 / frequency_khz) <-- now in kilohertz + * + * Here, 'ns' is usually a floating number. For example for a 2.5 GHz CPU + * the scaling factor becomes 1,000,000 / 2,500,000 = 1.2. + * + * To be able to use integer math we work around the lack of precision. We + * first scale the integer up and after the multiplication by the number + * of ticks in INSTR_TIME_GET_NANOSEC() we divide again by the same value. + * We picked the scaler such that it provides enough precision and is a + * power-of-two which allows for shifting instead of doing an integer + * division. We utilize unsigned integers even though ticks are stored as a + * signed value because that encourages compilers to generate better assembly. + * + * On all other platforms we are using clock_gettime(), which uses nanoseconds + * as ticks. Hence, we set the multiplier to zero, which causes pg_ticks_to_ns + * to return the original value. + */ +uint64 ticks_per_ns_scaled = 0; +uint64 max_ticks_no_overflow = 0; + +static void set_ticks_per_ns(void); + +static bool timing_initialized = false; + +void +pg_initialize_timing(void) +{ + if (timing_initialized) + return; + + set_ticks_per_ns(); + timing_initialized = true; +} + +#ifndef WIN32 + +static void +set_ticks_per_ns() +{ + ticks_per_ns_scaled = 0; + max_ticks_no_overflow = 0; +} + +#else /* WIN32 */ + +/* GetTimerFrequency returns counts per second */ +static inline double +GetTimerFrequency(void) +{ + LARGE_INTEGER f; + + QueryPerformanceFrequency(&f); + return (double) f.QuadPart; +} + +static void +set_ticks_per_ns() +{ + ticks_per_ns_scaled = NS_PER_S * TICKS_TO_NS_PRECISION / GetTimerFrequency(); + max_ticks_no_overflow = PG_INT64_MAX / ticks_per_ns_scaled; +} + +#endif /* WIN32 */ diff --git a/src/common/meson.build b/src/common/meson.build index 4f9b8b8263d..9bd55cda95b 100644 --- a/src/common/meson.build +++ b/src/common/meson.build @@ -13,6 +13,7 @@ common_sources = files( 'file_perm.c', 'file_utils.c', 'hashfn.c', + 'instr_time.c', 'ip.c', 'jsonapi.c', 'keywords.c', diff --git a/src/include/portability/instr_time.h b/src/include/portability/instr_time.h index 1b0c8e28f9b..6539ea3d6f2 100644 --- a/src/include/portability/instr_time.h +++ b/src/include/portability/instr_time.h @@ -79,11 +79,29 @@ typedef struct instr_time #define NS_PER_MS INT64CONST(1000000) #define NS_PER_US INT64CONST(1000) +/* + * Make sure this is a power-of-two, so that the compiler can turn the + * multiplications and divisions into shifts. + */ +#define TICKS_TO_NS_PRECISION (1<<14) -#ifndef WIN32 +/* + * Variables used to translate ticks to nanoseconds, initialized by + * pg_initialize_timing. + */ +extern PGDLLIMPORT uint64 ticks_per_ns_scaled; +extern PGDLLIMPORT uint64 max_ticks_no_overflow; + +/* + * Initialize timing infrastructure + * + * This must be called at least once before using INSTR_TIME_SET_CURRENT* macros. + */ +extern void pg_initialize_timing(void); +#ifndef WIN32 -/* Use clock_gettime() */ +/* On POSIX, use clock_gettime() for system clock source */ #include @@ -107,9 +125,8 @@ typedef struct instr_time #define PG_INSTR_CLOCK CLOCK_REALTIME #endif -/* helper for INSTR_TIME_SET_CURRENT */ static inline instr_time -pg_clock_gettime_ns(void) +pg_get_ticks(void) { instr_time now; struct timespec tmp; @@ -120,21 +137,12 @@ pg_clock_gettime_ns(void) return now; } -#define INSTR_TIME_SET_CURRENT(t) \ - ((t) = pg_clock_gettime_ns()) - -#define INSTR_TIME_GET_NANOSEC(t) \ - ((int64) (t).ticks) - - #else /* WIN32 */ +/* On Windows, use QueryPerformanceCounter() for system clock source */ -/* Use QueryPerformanceCounter() */ - -/* helper for INSTR_TIME_SET_CURRENT */ static inline instr_time -pg_query_performance_counter(void) +pg_get_ticks(void) { instr_time now; LARGE_INTEGER tmp; @@ -145,23 +153,54 @@ pg_query_performance_counter(void) return now; } -static inline double -GetTimerFrequency(void) -{ - LARGE_INTEGER f; - - QueryPerformanceFrequency(&f); - return (double) f.QuadPart; -} - -#define INSTR_TIME_SET_CURRENT(t) \ - ((t) = pg_query_performance_counter()) - -#define INSTR_TIME_GET_NANOSEC(t) \ - ((int64) ((t).ticks * ((double) NS_PER_S / GetTimerFrequency()))) - #endif /* WIN32 */ +static inline int64 +pg_ticks_to_ns(int64 ticks) +{ +#if defined(__x86_64__) || defined(_M_X64) + int64 ns = 0; + + /* + * Avoid doing work if we don't use scaled ticks, e.g. system clock on + * Unix + */ + if (ticks_per_ns_scaled == 0) + return ticks; + + /* + * Would multiplication overflow? If so perform computation in two parts. + * Check overflow without actually overflowing via: a * b > max <=> a > + * max / b + */ + if (unlikely(ticks > (int64) max_ticks_no_overflow)) + { + /* + * Compute how often the maximum number of ticks fits completely into + * the number of elapsed ticks and convert that number into + * nanoseconds. Then multiply by the count to arrive at the final + * value. In a 2nd step we adjust the number of elapsed ticks and + * convert the remaining ticks. + */ + int64 count = ticks / max_ticks_no_overflow; + int64 max_ns = max_ticks_no_overflow * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION; + + ns = max_ns * count; + + /* + * Subtract the ticks that we now already accounted for, so that they + * don't get counted twice. + */ + ticks -= count * max_ticks_no_overflow; + Assert(ticks >= 0); + } + + ns += ticks * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION; + return ns; +#else + return ticks; +#endif +} /* * Common macros @@ -173,6 +212,9 @@ GetTimerFrequency(void) #define INSTR_TIME_SET_NANOSEC(t, n) ((t).ticks = n) +#define INSTR_TIME_SET_CURRENT(t) \ + ((t) = pg_get_ticks()) + #define INSTR_TIME_ADD(x,y) \ ((x).ticks += (y).ticks) @@ -185,6 +227,9 @@ GetTimerFrequency(void) #define INSTR_TIME_GT(x,y) \ ((x).ticks > (y).ticks) +#define INSTR_TIME_GET_NANOSEC(t) \ + (pg_ticks_to_ns((t).ticks)) + #define INSTR_TIME_GET_DOUBLE(t) \ ((double) INSTR_TIME_GET_NANOSEC(t) / NS_PER_S) -- 2.47.1