From a7d10344d24425944e935124f9a41887bf70a557 Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Wed, 18 Aug 2021 14:55:17 +0900 Subject: [PATCH v3 1/2] Track I/O timing for temp buffers Previously, the output of EXPLAIN with BUFFERS option shows only I/O timing spent reading and writing shared and local buffers. This commit shows I/O timing for temp buffers in the output of EXPLAIN. It is helpful for users in a case where it is the temp buffers I/O that is using most of the time. Same as shared/local buffer I/O timings, temp buffers I/O timing is shown only when track_io_timing is enabled. --- doc/src/sgml/ref/explain.sgml | 8 +++--- src/backend/commands/explain.c | 42 ++++++++++++++++++++++++------ src/backend/executor/instrument.c | 6 +++++ src/backend/storage/file/buffile.c | 29 ++++++++++++++++++++- src/include/executor/instrument.h | 6 +++-- 5 files changed, 76 insertions(+), 15 deletions(-) diff --git a/doc/src/sgml/ref/explain.sgml b/doc/src/sgml/ref/explain.sgml index 4d758fb237..70c65c4ba1 100644 --- a/doc/src/sgml/ref/explain.sgml +++ b/doc/src/sgml/ref/explain.sgml @@ -174,10 +174,10 @@ ROLLBACK; Include information on buffer usage. Specifically, include the number of shared blocks hit, read, dirtied, and written, the number of local blocks hit, read, dirtied, and written, the number of temp blocks read and - written, and the time spent reading and writing data file blocks - (in milliseconds) if is enabled. - A hit means that a read was avoided because the block was - found already in cache when needed. + written, and the time spent reading and writing data file blocks and + temp file blocks (in milliseconds) if + is enabled. A hit means that a read was avoided because + the block was found already in cache when needed. Shared blocks contain data from regular tables and indexes; local blocks contain data from temporary tables and indexes; while temp blocks contain short-term working data used in sorts, hashes, diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index de81379da3..f4aaf6d1f3 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -3510,8 +3510,11 @@ show_buffer_usage(ExplainState *es, const BufferUsage *usage, bool planning) usage->temp_blks_written > 0); bool has_timing = (!INSTR_TIME_IS_ZERO(usage->blk_read_time) || !INSTR_TIME_IS_ZERO(usage->blk_write_time)); + bool has_temp_timing = (!INSTR_TIME_IS_ZERO(usage->temp_blk_read_time) || + !INSTR_TIME_IS_ZERO(usage->temp_blk_write_time)); bool show_planning = (planning && (has_shared || - has_local || has_temp || has_timing)); + has_local || has_temp || has_timing || + has_temp_timing)); if (show_planning) { @@ -3576,16 +3579,33 @@ show_buffer_usage(ExplainState *es, const BufferUsage *usage, bool planning) } /* As above, show only positive counter values. */ - if (has_timing) + if (has_timing || has_temp_timing) { ExplainIndentText(es); appendStringInfoString(es->str, "I/O Timings:"); - if (!INSTR_TIME_IS_ZERO(usage->blk_read_time)) - appendStringInfo(es->str, " read=%0.3f", - INSTR_TIME_GET_MILLISEC(usage->blk_read_time)); - if (!INSTR_TIME_IS_ZERO(usage->blk_write_time)) - appendStringInfo(es->str, " write=%0.3f", - INSTR_TIME_GET_MILLISEC(usage->blk_write_time)); + + if (has_timing) + { + appendStringInfoString(es->str, " shared/local"); + if (!INSTR_TIME_IS_ZERO(usage->blk_read_time)) + appendStringInfo(es->str, " read=%0.3f", + INSTR_TIME_GET_MILLISEC(usage->blk_read_time)); + if (!INSTR_TIME_IS_ZERO(usage->blk_write_time)) + appendStringInfo(es->str, " write=%0.3f", + INSTR_TIME_GET_MILLISEC(usage->blk_write_time)); + if (has_temp_timing) + appendStringInfoChar(es->str, ','); + } + if (has_temp_timing) + { + appendStringInfoString(es->str, " temp"); + if (!INSTR_TIME_IS_ZERO(usage->temp_blk_read_time)) + appendStringInfo(es->str, " read=%0.3f", + INSTR_TIME_GET_MILLISEC(usage->temp_blk_read_time)); + if (!INSTR_TIME_IS_ZERO(usage->temp_blk_write_time)) + appendStringInfo(es->str, " write=%0.3f", + INSTR_TIME_GET_MILLISEC(usage->temp_blk_write_time)); + } appendStringInfoChar(es->str, '\n'); } @@ -3622,6 +3642,12 @@ show_buffer_usage(ExplainState *es, const BufferUsage *usage, bool planning) ExplainPropertyFloat("I/O Write Time", "ms", INSTR_TIME_GET_MILLISEC(usage->blk_write_time), 3, es); + ExplainPropertyFloat("Temp I/O Read Time", "ms", + INSTR_TIME_GET_MILLISEC(usage->temp_blk_read_time), + 3, es); + ExplainPropertyFloat("Temp I/O Write Time", "ms", + INSTR_TIME_GET_MILLISEC(usage->temp_blk_write_time), + 3, es); } } } diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c index c5ff02a842..ceff4727d4 100644 --- a/src/backend/executor/instrument.c +++ b/src/backend/executor/instrument.c @@ -237,6 +237,8 @@ BufferUsageAdd(BufferUsage *dst, const BufferUsage *add) dst->temp_blks_written += add->temp_blks_written; INSTR_TIME_ADD(dst->blk_read_time, add->blk_read_time); INSTR_TIME_ADD(dst->blk_write_time, add->blk_write_time); + INSTR_TIME_ADD(dst->temp_blk_read_time, add->temp_blk_read_time); + INSTR_TIME_ADD(dst->temp_blk_write_time, add->temp_blk_write_time); } /* dst += add - sub */ @@ -259,6 +261,10 @@ BufferUsageAccumDiff(BufferUsage *dst, add->blk_read_time, sub->blk_read_time); INSTR_TIME_ACCUM_DIFF(dst->blk_write_time, add->blk_write_time, sub->blk_write_time); + INSTR_TIME_ACCUM_DIFF(dst->temp_blk_read_time, + add->temp_blk_read_time, sub->temp_blk_read_time); + INSTR_TIME_ACCUM_DIFF(dst->temp_blk_write_time, + add->temp_blk_write_time, sub->temp_blk_write_time); } /* helper functions for WAL usage accumulation */ diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c index 3108e96dc9..56b88594cc 100644 --- a/src/backend/storage/file/buffile.c +++ b/src/backend/storage/file/buffile.c @@ -429,6 +429,8 @@ static void BufFileLoadBuffer(BufFile *file) { File thisfile; + instr_time io_start; + instr_time io_time; /* * Advance to next component file if necessary and possible. @@ -440,10 +442,14 @@ BufFileLoadBuffer(BufFile *file) file->curOffset = 0L; } + thisfile = file->files[file->curFile]; + + if (track_io_timing) + INSTR_TIME_SET_CURRENT(io_start); + /* * Read whatever we can get, up to a full bufferload. */ - thisfile = file->files[file->curFile]; file->nbytes = FileRead(thisfile, file->buffer.data, sizeof(file->buffer), @@ -458,6 +464,13 @@ BufFileLoadBuffer(BufFile *file) FilePathName(thisfile)))); } + if (track_io_timing) + { + INSTR_TIME_SET_CURRENT(io_time); + INSTR_TIME_SUBTRACT(io_time, io_start); + INSTR_TIME_ADD(pgBufferUsage.temp_blk_read_time, io_time); + } + /* we choose not to advance curOffset here */ if (file->nbytes > 0) @@ -485,6 +498,8 @@ BufFileDumpBuffer(BufFile *file) while (wpos < file->nbytes) { off_t availbytes; + instr_time io_start; + instr_time io_time; /* * Advance to next component file if necessary and possible. @@ -507,6 +522,10 @@ BufFileDumpBuffer(BufFile *file) bytestowrite = (int) availbytes; thisfile = file->files[file->curFile]; + + if (track_io_timing) + INSTR_TIME_SET_CURRENT(io_start); + bytestowrite = FileWrite(thisfile, file->buffer.data + wpos, bytestowrite, @@ -517,6 +536,14 @@ BufFileDumpBuffer(BufFile *file) (errcode_for_file_access(), errmsg("could not write to file \"%s\": %m", FilePathName(thisfile)))); + + if (track_io_timing) + { + INSTR_TIME_SET_CURRENT(io_time); + INSTR_TIME_SUBTRACT(io_time, io_start); + INSTR_TIME_ADD(pgBufferUsage.temp_blk_write_time, io_time); + } + file->curOffset += bytestowrite; wpos += bytestowrite; diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h index 1b7157bdd1..6c080beb81 100644 --- a/src/include/executor/instrument.h +++ b/src/include/executor/instrument.h @@ -33,8 +33,10 @@ typedef struct BufferUsage int64 local_blks_written; /* # of local disk blocks written */ int64 temp_blks_read; /* # of temp blocks read */ int64 temp_blks_written; /* # of temp blocks written */ - instr_time blk_read_time; /* time spent reading */ - instr_time blk_write_time; /* time spent writing */ + instr_time blk_read_time; /* time spent reading blocks */ + instr_time blk_write_time; /* time spent writing blocks */ + instr_time temp_blk_read_time; /* time spent reading temp blocks */ + instr_time temp_blk_write_time; /* time spent writing temp blocks */ } BufferUsage; /* -- 2.24.3 (Apple Git-128)