From 29a12302bab6b2fb8c2475834510b90c4a6197ce Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Thu, 11 Jun 2020 19:38:18 -0700 Subject: [PATCH v1 2/2] WIP: Use cpu reference cycles, via rdtsc, to measure time for instrumentation. --- src/include/portability/instr_time.h | 68 ++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 8 deletions(-) diff --git a/src/include/portability/instr_time.h b/src/include/portability/instr_time.h index fc058d548a8..8b2f9a2e707 100644 --- a/src/include/portability/instr_time.h +++ b/src/include/portability/instr_time.h @@ -83,7 +83,9 @@ #define PG_INSTR_CLOCK CLOCK_REALTIME #endif +/* time in baseline cpu cycles */ typedef int64 instr_time; + #define NS_PER_S INT64CONST(1000000000) #define US_PER_S INT64CONST(1000000) #define MS_PER_S INT64CONST(1000) @@ -95,17 +97,67 @@ typedef int64 instr_time; #define INSTR_TIME_SET_ZERO(t) ((t) = 0) -static inline instr_time pg_clock_gettime_ns(void) +#include +#include + +/* + * Return what the number of cycles needs to be multiplied with to end up with + * seconds. + * + * FIXME: The cold portion should probably be out-of-line. And it'd be better + * to not recompute this in every file that uses this. Best would probably be + * to require explicit initialization of cycles_to_sec, because having a + * branch really is unnecessary. + * + * FIXME: We should probably not unnecessarily use floating point math + * here. And it's likely that the numbers are small enough that we are running + * into floating point inaccuracies already. Probably worthwhile to be a good + * bit smarter. + * + * FIXME: This would need to be conditional, with a fallback to something not + * rdtsc based. + */ +static inline double __attribute__((const)) +get_cycles_to_sec(void) { - struct timespec tmp; + static double cycles_to_sec = 0; - clock_gettime(PG_INSTR_CLOCK, &tmp); + /* + * Compute baseline cpu peformance, determines speed at which rdtsc advances + */ + if (unlikely(cycles_to_sec == 0)) + { + uint32 cpuinfo[4] = {0}; - return tmp.tv_sec * NS_PER_S + tmp.tv_nsec; + __get_cpuid(0x16, cpuinfo, cpuinfo + 1, cpuinfo + 2, cpuinfo + 3); + cycles_to_sec = 1 / ((double) cpuinfo[0] * 1000 * 1000); + } + + return cycles_to_sec; +} + +static inline instr_time pg_clock_gettime_ref_cycles(void) +{ + /* + * The rdtscp waits for all in-flight instructions to finish (but allows + * later instructions to start concurrently). That's good for some timing + * situations (when the time is supposed to cover all the work), but + * terrible for others (when sub-parts of work are measured, because then + * the pipeline stall due to the wait change the overall timing). + */ +#if 0 + unsigned int aux; + int64 tsc = __rdtscp(&aux); + + return tsc; +#else + + return __rdtsc(); +#endif } #define INSTR_TIME_SET_CURRENT(t) \ - (t) = pg_clock_gettime_ns() + (t) = pg_clock_gettime_ref_cycles() #define INSTR_TIME_ADD(x,y) \ do { \ @@ -123,13 +175,13 @@ static inline instr_time pg_clock_gettime_ns(void) } while (0) #define INSTR_TIME_GET_DOUBLE(t) \ - ((double) (t) / NS_PER_S) + ((double) (t) * get_cycles_to_sec()) #define INSTR_TIME_GET_MILLISEC(t) \ - ((double) (t) / NS_PER_MS) + ((double) (t) * (get_cycles_to_sec() * MS_PER_S)) #define INSTR_TIME_GET_MICROSEC(t) \ - ((double) (t) / NS_PER_US) + ((double) (t) * (get_cycles_to_sec() * US_PER_S)) #else /* !HAVE_CLOCK_GETTIME */ -- 2.25.0.114.g5b0ca878e0