From 8d320cb8d0a6e35280134fa927c0297e407bf444 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Mon, 16 Jan 2023 11:19:11 -0800 Subject: [PATCH v7 4/4] wip: report nanoseconds in pg_test_timing This commit also updates pg_test_timing's documentation: - compare EXPLAIN (ANALYZE, TIMING ON/OFF) instead of comparing performance of of statement with/without EXPLAIN ANALYZE - explain the 2x overhead (due to two timestamp acquisitions per row) - remove old section about old versions of linux - I couldn't update the numbers, and it's old enough nobody would care --- src/bin/pg_test_timing/pg_test_timing.c | 72 ++++++++------ doc/src/sgml/ref/pgtesttiming.sgml | 122 +++++++++++------------- 2 files changed, 99 insertions(+), 95 deletions(-) diff --git a/src/bin/pg_test_timing/pg_test_timing.c b/src/bin/pg_test_timing/pg_test_timing.c index c29d6f87629..bae10ce6fa4 100644 --- a/src/bin/pg_test_timing/pg_test_timing.c +++ b/src/bin/pg_test_timing/pg_test_timing.c @@ -19,8 +19,8 @@ static void handle_args(int argc, char *argv[]); static uint64 test_timing(unsigned int duration); static void output(uint64 loop_count); -/* record duration in powers of 2 microseconds */ -long long int histogram[32]; +/* record duration in powers of 2 nanoseconds */ +uint64 histogram[64]; int main(int argc, char *argv[]) @@ -122,34 +122,47 @@ static uint64 test_timing(unsigned int duration) { uint64 total_time; - int64 time_elapsed = 0; uint64 loop_count = 0; - uint64 prev, - cur; + instr_time cur; instr_time start_time, - end_time, - temp; - - total_time = duration > 0 ? duration * INT64CONST(1000000) : 0; + end_time; + instr_time until_time; INSTR_TIME_SET_CURRENT(start_time); - cur = INSTR_TIME_GET_MICROSEC(start_time); - while (time_elapsed < total_time) + /* + * To reduce loop overhead, check loop condition in instr_time domain. + */ + INSTR_TIME_SET_SECONDS(total_time, duration); + until_time = start_time; + INSTR_TIME_ADD(until_time, total_time); + + cur = start_time; + + while (cur < until_time) { - int32 diff, - bits = 0; + instr_time temp; + instr_time prev; + int64 diff; + int32 bits = 0; prev = cur; - INSTR_TIME_SET_CURRENT(temp); - cur = INSTR_TIME_GET_MICROSEC(temp); - diff = cur - prev; + INSTR_TIME_SET_CURRENT(cur); + temp = cur; + INSTR_TIME_SUBTRACT(temp, prev); + diff = INSTR_TIME_GET_NANOSEC(temp); /* Did time go backwards? */ - if (diff < 0) + if (unlikely(diff <= 0)) { + /* can't do anything with that measurement */ + if (diff == 0) + { + loop_count++; + continue; + } fprintf(stderr, _("Detected clock going backwards in time.\n")); - fprintf(stderr, _("Time warp: %d ms\n"), diff); + fprintf(stderr, _("Time warp: %lld ns\n"), (long long) diff); exit(1); } @@ -164,8 +177,6 @@ test_timing(unsigned int duration) histogram[bits]++; loop_count++; - INSTR_TIME_SUBTRACT(temp, start_time); - time_elapsed = INSTR_TIME_GET_MICROSEC(temp); } INSTR_TIME_SET_CURRENT(end_time); @@ -173,7 +184,7 @@ test_timing(unsigned int duration) INSTR_TIME_SUBTRACT(end_time, start_time); printf(_("Per loop time including overhead: %0.2f ns\n"), - INSTR_TIME_GET_DOUBLE(end_time) * 1e9 / loop_count); + (INSTR_TIME_GET_DOUBLE(end_time) * NS_PER_S) / loop_count); return loop_count; } @@ -181,9 +192,10 @@ test_timing(unsigned int duration) static void output(uint64 loop_count) { - int64 max_bit = 31, + int64 low_bit = 0, + max_bit = 63, i; - char *header1 = _("< us"); + char *header1 = _("< ns"); char *header2 = /* xgettext:no-c-format */ _("% of total"); char *header3 = _("count"); int len1 = strlen(header1); @@ -194,15 +206,19 @@ output(uint64 loop_count) while (max_bit > 0 && histogram[max_bit] == 0) max_bit--; + /* find lowest bit value */ + while (low_bit < max_bit && histogram[low_bit] == 0) + low_bit++; + printf(_("Histogram of timing durations:\n")); printf("%*s %*s %*s\n", - Max(6, len1), header1, + Max(9, len1), header1, Max(10, len2), header2, Max(10, len3), header3); - for (i = 0; i <= max_bit; i++) - printf("%*ld %*.5f %*lld\n", - Max(6, len1), 1l << i, + for (i = low_bit; i <= max_bit; i++) + printf("%*ld %*.5f %*llu\n", + Max(9, len1), 1l << i, Max(10, len2) - 1, (double) histogram[i] * 100 / loop_count, - Max(10, len3), histogram[i]); + Max(10, len3), (long long unsigned) histogram[i]); } diff --git a/doc/src/sgml/ref/pgtesttiming.sgml b/doc/src/sgml/ref/pgtesttiming.sgml index a5eb3aa25e0..82c625e3a2a 100644 --- a/doc/src/sgml/ref/pgtesttiming.sgml +++ b/doc/src/sgml/ref/pgtesttiming.sgml @@ -93,28 +93,34 @@ PostgreSQL documentation Good results will show most (>90%) individual timing calls take less than - one microsecond. Average per loop overhead will be even lower, below 100 - nanoseconds. This example from an Intel i7-860 system using a TSC clock - source shows excellent performance: + one microsecond (1000 nanoseconds). Average per loop overhead will be even + lower, below 100 nanoseconds. This example from an Intel i9-9880H system + using a TSC clock source shows excellent performance: - Note that different units are used for the per loop time than the - histogram. The loop can have resolution within a few nanoseconds (ns), - while the individual timing calls can only resolve down to one microsecond - (us). + Note that the accuracy of the histogram entries may be lower than the + per loop time. @@ -125,24 +131,30 @@ Histogram of timing durations: When the query executor is running a statement using EXPLAIN ANALYZE, individual operations are timed as well as showing a summary. The overhead of your system can be checked by - counting rows with the psql program: + disabling the per-row timing, using the TIMING OFF + option: -CREATE TABLE t AS SELECT * FROM generate_series(1,100000); -\timing -SELECT COUNT(*) FROM t; -EXPLAIN ANALYZE SELECT COUNT(*) FROM t; +CREATE TABLE t AS SELECT * FROM generate_series(1, 100000); +EXPLAIN (ANALYZE, TIMING OFF) SELECT COUNT(*) FROM t; +EXPLAIN (ANALYZE, TIMING ON) SELECT COUNT(*) FROM t; - The i7-860 system measured runs the count query in 9.8 ms while - the EXPLAIN ANALYZE version takes 16.6 ms, each - processing just over 100,000 rows. That 6.8 ms difference means the timing - overhead per row is 68 ns, about twice what pg_test_timing estimated it - would be. Even that relatively small amount of overhead is making the fully - timed count statement take almost 70% longer. On more substantial queries, - the timing overhead would be less problematic. + The i9-9880H system measured shows an execution time of 4.116 ms for the + TIMING OFF query, and 6.965 ms for the + TIMING ON, each processing 100,000 rows. + + That 2.849 ms difference means the timing overhead per row is 28 ns. As + TIMING ON measures timestamps twice per row returned by + an executor node, the overhead is very close to what pg_test_timing + estimated it would be. + + more than what pg_test_timing estimated it would be. Even that relatively + small amount of overhead is making the fully timed count statement take + about 60% longer. On more substantial queries, the timing overhead would + be less problematic. @@ -157,28 +169,31 @@ EXPLAIN ANALYZE SELECT COUNT(*) FROM t; /sys/devices/system/clocksource/clocksource0/current_clocksource # pg_test_timing -Per loop time including overhead: 722.92 ns +Testing timing overhead for 3 seconds. +Per loop time including overhead: 708.58 ns Histogram of timing durations: - < us % of total count - 1 27.84870 1155682 - 2 72.05956 2990371 - 4 0.07810 3241 - 8 0.01357 563 - 16 0.00007 3 + < ns % of total count + 1024 99.79796 4225270 + 2048 0.15560 6588 + 4096 0.00035 15 + 8192 0.01738 736 + 16384 0.01679 711 + 32768 0.01190 504 ]]> In this configuration, the sample EXPLAIN ANALYZE above - takes 115.9 ms. That's 1061 ns of timing overhead, again a small multiple - of what's measured directly by this utility. That much timing overhead - means the actual query itself is only taking a tiny fraction of the - accounted for time, most of it is being consumed in overhead instead. In - this configuration, any EXPLAIN ANALYZE totals involving - many timed operations would be inflated significantly by timing overhead. + shows an execution time of 148.7 ms. That's 1392 ns of timing overhead, + again a small multiple of what's measured directly by this utility. That + much timing overhead means the actual query itself is only taking a tiny + fraction of the accounted for time, most of it is being consumed in + overhead instead. In this configuration, any EXPLAIN + ANALYZE totals involving many timed operations would be inflated + significantly by timing overhead. @@ -196,33 +211,6 @@ kern.timecounter.hardware: ACPI-fast -> TSC - - Other systems may only allow setting the time source on boot. On older - Linux systems the "clock" kernel setting is the only way to make this sort - of change. And even on some more recent ones, the only option you'll see - for a clock source is "jiffies". Jiffies are the older Linux software clock - implementation, which can have good resolution when it's backed by fast - enough timing hardware, as in this example: - - - -- 2.38.0