From b5efb122f55701e386a2cd808c30b7f7db83c708 Mon Sep 17 00:00:00 2001 From: Lukas Fittl Date: Thu, 12 Feb 2026 01:12:19 -0800 Subject: [PATCH v6 4/4] pg_test_timing: Also test RDTSC/RDTSCP timing and report time source In passing also reduce the per-loop overhead caused by repeated divisions in INSTR_TIME_GET_NANOSEC when the ticks variable has become very large instead diff first and then turn it into nanosecs. Author: David Geier Author: Andres Freund Author: Lukas Fittl Reviewed-by: Discussion: https://www.postgresql.org/message-id/flat/20200612232810.f46nbqkdhbutzqdg%40alap3.anarazel.de --- src/bin/pg_test_timing/pg_test_timing.c | 96 ++++++++++++++++++++----- src/include/portability/instr_time.h | 9 +++ 2 files changed, 88 insertions(+), 17 deletions(-) diff --git a/src/bin/pg_test_timing/pg_test_timing.c b/src/bin/pg_test_timing/pg_test_timing.c index fee2911df15..6e630892b8d 100644 --- a/src/bin/pg_test_timing/pg_test_timing.c +++ b/src/bin/pg_test_timing/pg_test_timing.c @@ -30,7 +30,7 @@ static long long int largest_diff_count; static void handle_args(int argc, char *argv[]); -static uint64 test_timing(unsigned int duration); +static uint64 test_timing(unsigned int duration, TimingClockSourceType source, bool fast_timing); static void output(uint64 loop_count); int @@ -46,10 +46,47 @@ main(int argc, char *argv[]) /* initialize timing infrastructure (required for INSTR_* calls) */ pg_initialize_timing(); - loop_count = test_timing(test_duration); - + /* + * First, test default (non-fast) timing code. A clock source for that is + * always available. Hence, we can unconditionally output the result. + */ + loop_count = test_timing(test_duration, TIMING_CLOCK_SOURCE_SYSTEM, false); output(loop_count); +#if defined(__x86_64__) + + /* + * If on a supported architecture, test the RDTSC clock source. This clock + * source is not always available. In that case the loop count will be 0 + * and we don't print. + * + * We first emit RDTSCP timings, which is slower, and gets used for higher + * precision measurements when the TSC clock source is enabled. We emit + * RDTSC second, which is used for faster timing measurements with lower + * precision. + */ + printf("\n"); + loop_count = test_timing(test_duration, TIMING_CLOCK_SOURCE_TSC, false); + if (loop_count > 0) + { + output(loop_count); + printf("\n"); + + /* Now, emit fast timing measurements */ + loop_count = test_timing(test_duration, TIMING_CLOCK_SOURCE_TSC, true); + output(loop_count); + printf("\n"); + + pg_set_timing_clock_source(TIMING_CLOCK_SOURCE_AUTO); + if (pg_current_timing_clock_source() == TIMING_CLOCK_SOURCE_TSC) + printf(_("TSC clock source will be used by default, unless timing_clock_source is set to 'system'.\n")); + else + printf(_("TSC clock source will not be used by default, unless timing_clock_source is set to 'tsc'.\n")); + } + else + printf(_("TSC clock source is not usable. Likely unable to determine TSC frequency. are you running in an unsupported virtualized environment?.\n")); +#endif + return 0; } @@ -146,23 +183,41 @@ handle_args(int argc, char *argv[]) exit(1); } - printf(ngettext("Testing timing overhead for %u second.\n", - "Testing timing overhead for %u seconds.\n", + printf(ngettext("Testing timing overhead for %u second.\n\n", + "Testing timing overhead for %u seconds.\n\n", test_duration), test_duration); } static uint64 -test_timing(unsigned int duration) +test_timing(unsigned int duration, TimingClockSourceType source, bool fast_timing) { uint64 total_time; int64 time_elapsed = 0; uint64 loop_count = 0; - uint64 prev, - cur; instr_time start_time, end_time, - temp; + prev, + cur; + char *time_source = NULL; + + if (!pg_set_timing_clock_source(source)) + return 0; + +#if defined(__x86_64__) + if (pg_current_timing_clock_source() == TIMING_CLOCK_SOURCE_TSC) + time_source = fast_timing ? "RDTSC" : "RDTSCP"; + else + time_source = PG_INSTR_SYSTEM_CLOCK_NAME; +#else + time_source = PG_INSTR_SYSTEM_CLOCK_NAME; +#endif + if (fast_timing) + printf(_("Fast clock source: %s\n"), time_source); + else if (source == TIMING_CLOCK_SOURCE_SYSTEM) + printf(_("System clock source: %s\n"), time_source); + else + printf(_("Clock source: %s\n"), time_source); /* * Pre-zero the statistics data structures. They're already zero by @@ -176,8 +231,11 @@ test_timing(unsigned int duration) total_time = duration > 0 ? duration * INT64CONST(1000000000) : 0; - INSTR_TIME_SET_CURRENT(start_time); - cur = INSTR_TIME_GET_NANOSEC(start_time); + if (fast_timing) + INSTR_TIME_SET_CURRENT_FAST(start_time); + else + INSTR_TIME_SET_CURRENT(start_time); + cur = start_time; while (time_elapsed < total_time) { @@ -185,9 +243,11 @@ test_timing(unsigned int duration) bits; prev = cur; - INSTR_TIME_SET_CURRENT(temp); - cur = INSTR_TIME_GET_NANOSEC(temp); - diff = cur - prev; + if (fast_timing) + INSTR_TIME_SET_CURRENT_FAST(cur); + else + INSTR_TIME_SET_CURRENT(cur); + diff = INSTR_TIME_DIFF_NANOSEC(cur, prev); /* Did time go backwards? */ if (unlikely(diff < 0)) @@ -220,11 +280,13 @@ test_timing(unsigned int duration) largest_diff_count++; loop_count++; - INSTR_TIME_SUBTRACT(temp, start_time); - time_elapsed = INSTR_TIME_GET_NANOSEC(temp); + time_elapsed = INSTR_TIME_DIFF_NANOSEC(cur, start_time); } - INSTR_TIME_SET_CURRENT(end_time); + if (fast_timing) + INSTR_TIME_SET_CURRENT_FAST(end_time); + else + INSTR_TIME_SET_CURRENT(end_time); INSTR_TIME_SUBTRACT(end_time, start_time); diff --git a/src/include/portability/instr_time.h b/src/include/portability/instr_time.h index e7cc251b3d2..41a2225fe6e 100644 --- a/src/include/portability/instr_time.h +++ b/src/include/portability/instr_time.h @@ -28,6 +28,8 @@ * * INSTR_TIME_SUBTRACT(x, y) x -= y * + * INSTR_TIME_DIFF_NANOSEC(x, y) x - y (in nanoseconds) + * * INSTR_TIME_ACCUM_DIFF(x, y, z) x += (y - z) * * INSTR_TIME_GET_DOUBLE(t) convert t to double (in seconds) @@ -164,10 +166,13 @@ extern bool pg_set_timing_clock_source(TimingClockSourceType source); */ #if defined(__darwin__) && defined(CLOCK_MONOTONIC_RAW) #define PG_INSTR_SYSTEM_CLOCK CLOCK_MONOTONIC_RAW +#define PG_INSTR_SYSTEM_CLOCK_NAME "clock_gettime (CLOCK_MONOTONIC_RAW)" #elif defined(CLOCK_MONOTONIC) #define PG_INSTR_SYSTEM_CLOCK CLOCK_MONOTONIC +#define PG_INSTR_SYSTEM_CLOCK_NAME "clock_gettime (CLOCK_MONOTONIC)" #else #define PG_INSTR_SYSTEM_CLOCK CLOCK_REALTIME +#define PG_INSTR_SYSTEM_CLOCK_NAME "clock_gettime (CLOCK_REALTIME)" #endif static inline instr_time @@ -186,6 +191,7 @@ pg_get_ticks_system(void) /* On Windows, use QueryPerformanceCounter() for system clock source */ +#define PG_INSTR_SYSTEM_CLOCK_NAME "QueryPerformanceCounter" static inline instr_time pg_get_ticks_system(void) { @@ -289,6 +295,9 @@ pg_get_ticks(void) #define INSTR_TIME_SUBTRACT(x,y) \ ((x).ticks -= (y).ticks) +#define INSTR_TIME_DIFF_NANOSEC(x,y) \ + (pg_ticks_to_ns((x).ticks - (y).ticks)) + #define INSTR_TIME_ACCUM_DIFF(x,y,z) \ ((x).ticks += (y).ticks - (z).ticks) -- 2.47.1