From c5f813b209023d2ad6247a17969f4410e7511a40 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Wed, 28 Dec 2022 12:09:15 -0800 Subject: [PATCH v42 4/4] Add system view tracking IO ops per backend type Add pg_stat_io, a system view which tracks the number of IOOps (evictions, reuses, reads, writes, extensions, and fsyncs) done on each IOObject (relation, temp relation) in each IOContext ("normal" and those using a BufferAccessStrategy) by each type of backend (e.g. client backend, checkpointer). Some BackendTypes do not accumulate IO operations statistics and will not be included in the view. Some IOContexts are not used by some BackendTypes and will not be in the view. For example, checkpointer does not use a BufferAccessStrategy (currently), so there will be no rows for BufferAccessStrategy IOContexts for checkpointer. Some IOObjects are never operated on in some IOContexts or by some BackendTypes. These rows are omitted from the view. For example, checkpointer will never operate on IOOBJECT_TEMP_RELATION data, so those rows are omitted. Some IOOps are invalid in combination with certain IOContexts and certain IOObjects. Those cells will be NULL in the view to distinguish between 0 observed IOOps of that type and an invalid combination. For example, temporary tables are not fsynced so cells for all BackendTypes for IOOBJECT_TEMP_RELATION and IOOP_FSYNC will be NULL. Some BackendTypes never perform certain IOOps. Those cells will also be NULL in the view. For example, bgwriter should not perform reads. View stats are populated with statistics incremented when a backend performs an IO Operation and maintained by the cumulative statistics subsystem. Each row of the view shows stats for a particular BackendType, IOObject, IOContext combination (e.g. a client backend's operations on permanent relations in shared buffers) and each column in the view is the total number of IO Operations done (e.g. writes). So a cell in the view would be, for example, the number of blocks of relation data written from shared buffers by client backends since the last stats reset. In anticipation of tracking WAL IO and non-block-oriented IO (such as temporary file IO), the "op_bytes" column specifies the unit of the "read", "written", and "extended" columns for a given row. Note that some of the cells in the view are redundant with fields in pg_stat_bgwriter (e.g. buffers_backend), however these have been kept in pg_stat_bgwriter for backwards compatibility. Deriving the redundant pg_stat_bgwriter stats from the IO operations stats structures was also problematic due to the separate reset targets for 'bgwriter' and 'io'. Suggested by Andres Freund Author: Melanie Plageman Reviewed-by: Andres Freund Reviewed-by: Justin Pryzby Reviewed-by: Kyotaro Horiguchi Reviewed-by: Maciek Sakrejda Reviewed-by: Lukas Fittl Discussion: https://www.postgresql.org/message-id/flat/20200124195226.lth52iydq2n2uilq%40alap3.anarazel.de --- contrib/amcheck/expected/check_heap.out | 31 ++ contrib/amcheck/sql/check_heap.sql | 24 ++ doc/src/sgml/monitoring.sgml | 418 +++++++++++++++++++++++- src/backend/catalog/system_views.sql | 15 + src/backend/utils/adt/pgstatfuncs.c | 142 ++++++++ src/include/catalog/pg_proc.dat | 9 + src/test/regress/expected/rules.out | 12 + src/test/regress/expected/stats.out | 225 +++++++++++++ src/test/regress/sql/stats.sql | 138 ++++++++ src/tools/pgindent/typedefs.list | 1 + 10 files changed, 1001 insertions(+), 14 deletions(-) diff --git a/contrib/amcheck/expected/check_heap.out b/contrib/amcheck/expected/check_heap.out index c010361025..c44338fd6e 100644 --- a/contrib/amcheck/expected/check_heap.out +++ b/contrib/amcheck/expected/check_heap.out @@ -66,6 +66,19 @@ SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'ALL-VISIBLE'); INSERT INTO heaptest (a, b) (SELECT gs, repeat('x', gs) FROM generate_series(1,50) gs); +-- pg_stat_io test: +-- verify_heapam always uses a BAS_BULKREAD BufferAccessStrategy. This allows +-- us to reliably test that pg_stat_io BULKREAD reads are being captured +-- without relying on the size of shared buffers or on an expensive operation +-- like CREATE DATABASE. +-- +-- Create an alternative tablespace and move the heaptest table to it, causing +-- it to be rewritten. +SET allow_in_place_tablespaces = true; +CREATE TABLESPACE test_stats LOCATION ''; +SELECT sum(read) AS stats_bulkreads_before + FROM pg_stat_io WHERE io_context = 'bulkread' \gset +ALTER TABLE heaptest SET TABLESPACE test_stats; -- Check that valid options are not rejected nor corruption reported -- for a non-empty table SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none'); @@ -88,6 +101,23 @@ SELECT * FROM verify_heapam(relation := 'heaptest', startblock := 0, endblock := -------+--------+--------+----- (0 rows) +-- verify_heapam should have read in the page written out by +-- ALTER TABLE ... SET TABLESPACE ... +-- causing an additional bulkread, which should be reflected in pg_stat_io. +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT sum(read) AS stats_bulkreads_after + FROM pg_stat_io WHERE io_context = 'bulkread' \gset +SELECT :stats_bulkreads_after > :stats_bulkreads_before; + ?column? +---------- + t +(1 row) + CREATE ROLE regress_heaptest_role; -- verify permissions are checked (error due to function not callable) SET ROLE regress_heaptest_role; @@ -195,6 +225,7 @@ ERROR: cannot check relation "test_foreign_table" DETAIL: This operation is not supported for foreign tables. -- cleanup DROP TABLE heaptest; +DROP TABLESPACE test_stats; DROP TABLE test_partition; DROP TABLE test_partitioned; DROP OWNED BY regress_heaptest_role; -- permissions diff --git a/contrib/amcheck/sql/check_heap.sql b/contrib/amcheck/sql/check_heap.sql index 298de6886a..210f9b22e2 100644 --- a/contrib/amcheck/sql/check_heap.sql +++ b/contrib/amcheck/sql/check_heap.sql @@ -20,11 +20,26 @@ SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'NONE'); SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'ALL-FROZEN'); SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'ALL-VISIBLE'); + -- Add some data so subsequent tests are not entirely trivial INSERT INTO heaptest (a, b) (SELECT gs, repeat('x', gs) FROM generate_series(1,50) gs); +-- pg_stat_io test: +-- verify_heapam always uses a BAS_BULKREAD BufferAccessStrategy. This allows +-- us to reliably test that pg_stat_io BULKREAD reads are being captured +-- without relying on the size of shared buffers or on an expensive operation +-- like CREATE DATABASE. +-- +-- Create an alternative tablespace and move the heaptest table to it, causing +-- it to be rewritten. +SET allow_in_place_tablespaces = true; +CREATE TABLESPACE test_stats LOCATION ''; +SELECT sum(read) AS stats_bulkreads_before + FROM pg_stat_io WHERE io_context = 'bulkread' \gset +ALTER TABLE heaptest SET TABLESPACE test_stats; + -- Check that valid options are not rejected nor corruption reported -- for a non-empty table SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none'); @@ -32,6 +47,14 @@ SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'all-frozen'); SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'all-visible'); SELECT * FROM verify_heapam(relation := 'heaptest', startblock := 0, endblock := 0); +-- verify_heapam should have read in the page written out by +-- ALTER TABLE ... SET TABLESPACE ... +-- causing an additional bulkread, which should be reflected in pg_stat_io. +SELECT pg_stat_force_next_flush(); +SELECT sum(read) AS stats_bulkreads_after + FROM pg_stat_io WHERE io_context = 'bulkread' \gset +SELECT :stats_bulkreads_after > :stats_bulkreads_before; + CREATE ROLE regress_heaptest_role; -- verify permissions are checked (error due to function not callable) @@ -110,6 +133,7 @@ SELECT * FROM verify_heapam('test_foreign_table', -- cleanup DROP TABLE heaptest; +DROP TABLESPACE test_stats; DROP TABLE test_partition; DROP TABLE test_partitioned; DROP OWNED BY regress_heaptest_role; -- permissions diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 710bd2c52e..b27c6c7bc7 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -469,6 +469,15 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser + + pg_stat_iopg_stat_io + A row for each IO Context for each backend type showing + statistics about backend IO operations. See + + pg_stat_io for details. + + + pg_stat_replication_slotspg_stat_replication_slots One row per replication slot, showing statistics about the @@ -665,20 +674,20 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser - The pg_statio_ views are primarily useful to - determine the effectiveness of the buffer cache. When the number - of actual disk reads is much smaller than the number of buffer - hits, then the cache is satisfying most read requests without - invoking a kernel call. However, these statistics do not give the - entire story: due to the way in which PostgreSQL - handles disk I/O, data that is not in the - PostgreSQL buffer cache might still reside in the - kernel's I/O cache, and might therefore still be fetched without - requiring a physical read. Users interested in obtaining more - detailed information on PostgreSQL I/O behavior are - advised to use the PostgreSQL statistics views - in combination with operating system utilities that allow insight - into the kernel's handling of I/O. + The pg_stat_io and + pg_statio_ set of views are primarily useful to + determine the effectiveness of the buffer cache. When the number of actual + disk reads is much smaller than the number of buffer hits, then the cache is + satisfying most read requests without invoking a kernel call. However, these + statistics do not give the entire story: due to the way in which + PostgreSQL handles disk I/O, data that is not in + the PostgreSQL buffer cache might still reside in + the kernel's I/O cache, and might therefore still be fetched without + requiring a physical read. Users interested in obtaining more detailed + information on PostgreSQL I/O behavior are + advised to use the PostgreSQL statistics views in + combination with operating system utilities that allow insight into the + kernel's handling of I/O. @@ -3628,6 +3637,387 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i last_archived_wal have also been successfully archived. + + + + <structname>pg_stat_io</structname> + + + pg_stat_io + + + + The pg_stat_io view shows IO related + statistics. The statistics are tracked separately for each backend type, IO + context (XXX rephrase), IO object (XXX rephrase), with each combination + returned as a separate row (combinations that do not make sense are + omitted). + + + + Currently, IO on relations (e.g. tables, indexes) are tracked. However, + relation IO that bypasses shared buffers (e.g. when moving a table from one + tablespace to another) currently is not tracked. + + + + <structname>pg_stat_io</structname> View + + + + + + Column Type + + + Description + + + + + + + + + backend_type text + + + Type of backend (e.g. background worker, autovacuum worker). + See + pg_stat_activity for more information on + backend_types. Some backend_types + do not accumulate IO operation statistics and will not be included in + the view. + + + + + + + + io_context text + + + The context of an IO operation or location of an IO object: + + + + + normal refers to the default or standard type or + location of IO operations on IO objects. + + + Operations on temporary relations use a process-local buffer pool and + are counted as io_context + normal , io_object + temp relation operations. + + + IO operations on permanent relations are done by default in shared + buffers. These are tracked in io_context + normal, io_object + relation. + + + + + vacuum refers to the IO operations incurred while + vacuuming and analyzing permanent relations. + + + + + bulkread refers to IO operations on permanent + relations specially designated as bulkreads, such + as the sequential scan of a large table. + + + + + bulkwrite refers to IO operations on permanent + relations specially designated as bulkwrites, + such as COPY. + + + + + These last three io_contexts are counted separately + because the autovacuum daemon, explicit VACUUM, + explicit ANALYZE, many bulk reads, and many bulk + writes acquire a limited number of shared buffers and reuse them + circularly to avoid occupying an undue portion of the main shared + buffer pool. This pattern is called a Buffer Access + Strategy in the PostgreSQL source + code and the fixed-size ring buffer is referred to as a strategy + ring buffer for the purposes of this view's documentation. + These io_contexts are referred to as strategy + contexts and IO operations on strategy contexts are referred to + as strategy operations. + + + + + + + + io_object text + + + Object operated on in a given io_context by a given + backend_type. Current values are + relation, which includes permanent relations, and + temp relation which includes temporary relations + created by CREATE TEMPORARY TABLE.... + + + + + + + + read bigint + + + Reads by a given backend_type of a given + io_object into buffers in a given + io_context. + + + Note that the sum of + heap_blks_read, + idx_blks_read, + tidx_blks_read, and + toast_blks_read + in + pg_statio_all_tables as well as + blks_read in + pg_stat_database are both similar to + read plus extended for all + io_contexts for the following + backend_types in pg_stat_io: + + autovacuum launcher + autovacuum worker + client backend + standalone backend + background worker + walsender + + The difference is that reads done as part of CREATE + DATABASE are not counted in + pg_statio_all_tables and + pg_stat_database. + + + + + + + + written bigint + + + Writes by a given backend_type of a given + io_object of data from a given + io_context. + + + Normal client backends should be able to rely on auxiliary processes + like the checkpointer and background writer to write out dirty data as + much as possible. Large numbers of writes by + backend_type client backend in + io_context normal and + io_object relation could indicate + a misconfiguration of shared buffers or of checkpointer. More + information on checkpointer configuration can be found in . + + + Note that the values of written for + backend_type background writer and + backend_type checkpointer + correspond to the values of buffers_clean and + buffers_checkpoint, respectively, in + pg_stat_bgwriter. + buffers_backend in + pg_stat_bgwriter corresponds to + pg_stat_io's written plus + extended for io_contexts + normal, bulkread, + bulkwrite, and vacuum on + io_object relation for + backend_types: + + client backend + autovacuum worker + background worker + walsender + + + + + + + + + extended bigint + + + Extends of relations done by a given backend_type in + order to write data for a given io_object in a given + io_context. + + + + + + + + op_bytes bigint + + + The number of bytes per unit of IO read, written, or extended. For + block-oriented IO of relation data, reads, writes, and extends are done + in block_size units, derived from the build-time + parameter BLCKSZ, which is 8192 by + default. + + + + + + + + evicted bigint + + + Number of times a backend_type has evicted a block + from a shared or local buffer in order to reuse the buffer in this + io_context. Blocks are only evicted when there are no + unoccupied buffers. + + + evicted in io_context + normal and io_object + relation counts the number of times a block from a + shared buffer was evicted so that it can be replaced with another block, + also in shared buffers. + + + A high evicted count in io_context + normal and io_object + relation could indicate that shared buffers is too + small and should be set to a larger value. + + + evicted in io_context + vacuum, bulkread, and + bulkwrite counts the number of times occupied shared + buffers were added to the size-limited strategy ring buffer, causing the + buffer contents to be evicted. If the to-be-used buffer in the ring is + pinned or in use by another backend, it may be replaced by a new shared + buffer. If this shared buffer contains valid data, that block must be + evicted and will count as evicted. + + + Seeing a large number of evicted in strategy + io_contexts can provide insight into primary working + set cache misses. + + + + + + + + reused bigint + + + The number of times an existing buffer in the strategy ring was reused + as part of an operation in the bulkread, + bulkwrite, or vacuum + io_contexts. When a Buffer Access Strategy reuses a + buffer in the strategy ring, it evicts the buffer contents, incrementing + reused. When a Buffer Access Strategy adds a new + shared buffer to the strategy ring and this shared buffer is occupied, + the Buffer Access Strategy must evict the contents of the shared buffer, + incrementing evicted. + + + + + + + + files_synced bigint + + + Number of files fsynced by a given + backend_type for the purpose of persisting data from + a given io_object dirtied in a given + io_context. fsyncs are done at + segment boundaries so op_bytes does not apply to the + files_synced column. + + fsyncs are always tracked in + io_context normal. + + + Normally client backends rely on the checkpointer to ensure data is + persisted to permanent storage. Large numbers of + files_synced by backend_type + client backend could indicate a misconfiguration of + shared buffers or of checkpointer. More information on checkpointer + configuration can be found in . + + + Note that the sum of files_synced for all + io_context normal + io_object relation for all + backend_types except checkpointer + corresponds to buffers_backend_fsync in + pg_stat_bgwriter. + + + + + + + + stats_reset timestamp with time zone + + + Time at which these statistics were last reset. + + + + + +
+ + + Some backend_types do not perform IO operations in some + io_contexts and/or io_objects. These + rows are omitted from the view. For example, the checkpointer does not use + a Buffer Access Strategy, so there will be no rows for + backend_type checkpointer in any of + the strategy io_contexts. + + On a more granular level, some IO operations are invalid in combination + with certain io_contexts and + io_objects. Those cells will be NULL to distinguish + between 0 observed IO operations of that type and an invalid + combination. For example, temporary tables are not fsynced, so cells for + all backend_types for io_object + temp relation in io_context + normal for files_synced will be + NULL. Some backend_types never perform certain IO + operations. Those cells will also be NULL in the view. For example + background writer should not perform reads. +
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 447c9b970f..71646f5aef 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1117,6 +1117,21 @@ CREATE VIEW pg_stat_bgwriter AS pg_stat_get_buf_alloc() AS buffers_alloc, pg_stat_get_bgwriter_stat_reset_time() AS stats_reset; +CREATE VIEW pg_stat_io AS +SELECT + b.backend_type, + b.io_context, + b.io_object, + b.read, + b.written, + b.extended, + b.op_bytes, + b.evicted, + b.reused, + b.files_synced, + b.stats_reset +FROM pg_stat_get_io() b; + CREATE VIEW pg_stat_wal AS SELECT w.wal_records, diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 42b890b806..ad369cd7ec 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1234,6 +1234,148 @@ pg_stat_get_buf_alloc(PG_FUNCTION_ARGS) PG_RETURN_INT64(pgstat_fetch_stat_bgwriter()->buf_alloc); } +/* +* When adding a new column to the pg_stat_io view, add a new enum value +* here above IO_NUM_COLUMNS. +*/ +typedef enum io_stat_col +{ + IO_COL_BACKEND_TYPE, + IO_COL_IO_CONTEXT, + IO_COL_IO_OBJECT, + IO_COL_READS, + IO_COL_WRITES, + IO_COL_EXTENDS, + IO_COL_CONVERSION, + IO_COL_EVICTIONS, + IO_COL_REUSES, + IO_COL_FSYNCS, + IO_COL_RESET_TIME, + IO_NUM_COLUMNS, +} io_stat_col; + +/* + * When adding a new IOOp, add a new io_stat_col and add a case to this + * function returning the corresponding io_stat_col. + */ +static io_stat_col +pgstat_get_io_op_index(IOOp io_op) +{ + switch (io_op) + { + case IOOP_EVICT: + return IO_COL_EVICTIONS; + case IOOP_READ: + return IO_COL_READS; + case IOOP_REUSE: + return IO_COL_REUSES; + case IOOP_WRITE: + return IO_COL_WRITES; + case IOOP_EXTEND: + return IO_COL_EXTENDS; + case IOOP_FSYNC: + return IO_COL_FSYNCS; + } + + elog(ERROR, "unrecognized IOOp value: %d", io_op); + pg_unreachable(); +} + +Datum +pg_stat_get_io(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo; + PgStat_IO *backends_io_stats; + Datum reset_time; + + InitMaterializedSRF(fcinfo, 0); + rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + + backends_io_stats = pgstat_fetch_stat_io(); + + reset_time = TimestampTzGetDatum(backends_io_stats->stat_reset_timestamp); + + for (BackendType bktype = B_INVALID; bktype < BACKEND_NUM_TYPES; bktype++) + { + bool bktype_tracked; + Datum bktype_desc = CStringGetTextDatum(GetBackendTypeDesc(bktype)); + PgStat_IOContextOps *io_context_ops = &backends_io_stats->stats[bktype]; + + /* + * For those BackendTypes without IO Operation stats, skip + * representing them in the view altogether. We still loop through + * their counters so that we can assert that all values are zero. + */ + bktype_tracked = pgstat_tracks_io_bktype(bktype); + + for (IOContext io_context = IOCONTEXT_BULKREAD; + io_context < IOCONTEXT_NUM_TYPES; io_context++) + { + const char *context_name = pgstat_get_io_context_name(io_context); + const PgStat_IOObjectOps *io_objs = &io_context_ops->data[io_context]; + + for (IOObject io_obj = IOOBJECT_RELATION; + io_obj < IOOBJECT_NUM_TYPES; io_obj++) + { + const PgStat_IOOpCounters *counters = &io_objs->data[io_obj]; + const char *obj_name = pgstat_get_io_object_name(io_obj); + + Datum values[IO_NUM_COLUMNS] = {0}; + bool nulls[IO_NUM_COLUMNS] = {0}; + + /* + * Some combinations of IOContext, IOObject, and BackendType + * are not valid for any type of IOOp. In such cases, omit the + * entire row from the view. + */ + if (!bktype_tracked || + !pgstat_tracks_io_object(bktype, io_context, io_obj)) + { + Assert(pgstat_iszero_io_object(counters)); + continue; + } + + values[IO_COL_BACKEND_TYPE] = bktype_desc; + values[IO_COL_IO_CONTEXT] = CStringGetTextDatum(context_name); + values[IO_COL_IO_OBJECT] = CStringGetTextDatum(obj_name); + values[IO_COL_RESET_TIME] = TimestampTzGetDatum(reset_time); + + /* + * Hard-code this to the value of BLCKSZ for now. Future + * values could include XLOG_BLCKSZ, once WAL IO is tracked, + * and constant multipliers, once non-block-oriented IO (e.g. + * temporary file IO) is tracked. + */ + values[IO_COL_CONVERSION] = Int64GetDatum(BLCKSZ); + + /* + * Some combinations of BackendType and IOOp, of IOContext and + * IOOp, and of IOObject and IOOp are not tracked. Set these + * cells in the view NULL and assert that these stats are zero + * as expected. + */ + for (IOOp io_op = IOOP_EVICT; io_op < IOOP_NUM_TYPES; io_op++) + { + int col_idx = pgstat_get_io_op_index(io_op); + + nulls[col_idx] = !pgstat_tracks_io_op(bktype, io_context, io_obj, io_op); + + if (!nulls[col_idx]) + values[col_idx] = + Int64GetDatum(pgstat_get_io_op_value(counters, io_op)); + else + Assert(pgstat_iszero_io_op(counters, io_op)); + } + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + } + } + + return (Datum) 0; +} + /* * Returns statistics of WAL activity */ diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 7be9a50147..782f27523f 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5686,6 +5686,15 @@ proname => 'pg_stat_get_buf_alloc', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => '', prosrc => 'pg_stat_get_buf_alloc' }, +{ oid => '8459', descr => 'statistics: per backend type IO statistics', + proname => 'pg_stat_get_io', provolatile => 'v', + prorows => '30', proretset => 't', + proparallel => 'r', prorettype => 'record', proargtypes => '', + proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,timestamptz}', + proargmodes => '{o,o,o,o,o,o,o,o,o,o,o}', + proargnames => '{backend_type,io_context,io_object,read,written,extended,op_bytes,evicted,reused,files_synced,stats_reset}', + prosrc => 'pg_stat_get_io' }, + { oid => '1136', descr => 'statistics: information about WAL activity', proname => 'pg_stat_get_wal', proisstrict => 'f', provolatile => 's', proparallel => 'r', prorettype => 'record', proargtypes => '', diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index fb9f936d43..2d0e7dc5c5 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1876,6 +1876,18 @@ pg_stat_gssapi| SELECT s.pid, s.gss_enc AS encrypted FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid, query_id) WHERE (s.client_port IS NOT NULL); +pg_stat_io| SELECT b.backend_type, + b.io_context, + b.io_object, + b.read, + b.written, + b.extended, + b.op_bytes, + b.evicted, + b.reused, + b.files_synced, + b.stats_reset + FROM pg_stat_get_io() b(backend_type, io_context, io_object, read, written, extended, op_bytes, evicted, reused, files_synced, stats_reset); pg_stat_progress_analyze| SELECT s.pid, s.datid, d.datname, diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out index 1d84407a03..01070a53a4 100644 --- a/src/test/regress/expected/stats.out +++ b/src/test/regress/expected/stats.out @@ -1126,4 +1126,229 @@ SELECT pg_stat_get_subscription_stats(NULL); (1 row) +-- Test that the following operations are tracked in pg_stat_io: +-- - reads of target blocks into shared buffers +-- - writes of shared buffers to permanent storage +-- - extends of relations using shared buffers +-- - fsyncs done to ensure the durability of data dirtying shared buffers +-- There is no test for blocks evicted from shared buffers, because we cannot +-- be sure of the state of shared buffers at the point the test is run. +-- Create a regular table and insert some data to generate IOCONTEXT_NORMAL +-- extends. +SELECT sum(extended) AS io_sum_shared_extends_before + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +CREATE TABLE test_io_shared(a int); +INSERT INTO test_io_shared SELECT i FROM generate_series(1,100)i; +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT sum(extended) AS io_sum_shared_extends_after + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +SELECT :io_sum_shared_extends_after > :io_sum_shared_extends_before; + ?column? +---------- + t +(1 row) + +-- After a checkpoint, there should be some additional IOCONTEXT_NORMAL writes +-- and fsyncs. +-- The second checkpoint ensures that stats from the first checkpoint have been +-- reported and protects against any potential races amongst the table +-- creation, a possible timing-triggered checkpoint, and the explicit +-- checkpoint in the test. +SELECT sum(written) AS io_sum_shared_writes_before + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +SELECT sum(files_synced) AS io_sum_shared_fsyncs_before + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +CHECKPOINT; +CHECKPOINT; +SELECT sum(written) AS io_sum_shared_writes_after + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +SELECT sum(files_synced) AS io_sum_shared_fsyncs_after + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +SELECT :io_sum_shared_writes_after > :io_sum_shared_writes_before; + ?column? +---------- + t +(1 row) + +SELECT current_setting('fsync') = 'off' OR :io_sum_shared_fsyncs_after > :io_sum_shared_fsyncs_before; + ?column? +---------- + t +(1 row) + +-- Change the tablespace so that the table is rewritten directly, then SELECT +-- from it to cause it to be read back into shared buffers. +SET allow_in_place_tablespaces = true; +CREATE TABLESPACE test_io_shared_stats_tblspc LOCATION ''; +SELECT sum(read) AS io_sum_shared_reads_before + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +ALTER TABLE test_io_shared SET TABLESPACE test_io_shared_stats_tblspc; +-- SELECT from the table so that it is read into shared buffers and io_context +-- 'normal', io_object 'relation' reads are counted. +SELECT COUNT(*) FROM test_io_shared; + count +------- + 100 +(1 row) + +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT sum(read) AS io_sum_shared_reads_after + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +SELECT :io_sum_shared_reads_after > :io_sum_shared_reads_before; + ?column? +---------- + t +(1 row) + +DROP TABLE test_io_shared; +DROP TABLESPACE test_io_shared_stats_tblspc; +-- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io: +-- - eviction of local buffers in order to reuse them +-- - reads of temporary table blocks into local buffers +-- - writes of local buffers to permanent storage +-- - extends of temporary tables +-- Set temp_buffers to a low value so that we can trigger writes with fewer +-- inserted tuples. Do so in a new session in case temporary tables have been +-- accessed by previous tests in this session. +\c +SET temp_buffers TO '1MB'; +CREATE TEMPORARY TABLE test_io_local(a int, b TEXT); +SELECT sum(extended) AS io_sum_local_extends_before + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +SELECT sum(evicted) AS io_sum_local_evictions_before + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +SELECT sum(written) AS io_sum_local_writes_before + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +-- Insert tuples into the temporary table, generating extends in the stats. +-- Insert enough values that we need to reuse and write out dirty local +-- buffers, generating evictions and writes. +INSERT INTO test_io_local SELECT generate_series(1, 8000) as id, repeat('a', 100); +SELECT sum(read) AS io_sum_local_reads_before + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +-- Read in evicted buffers, generating reads. +SELECT COUNT(*) FROM test_io_local; + count +------- + 8000 +(1 row) + +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT sum(evicted) AS io_sum_local_evictions_after + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +SELECT sum(read) AS io_sum_local_reads_after + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +SELECT sum(written) AS io_sum_local_writes_after + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +SELECT sum(extended) AS io_sum_local_extends_after + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +SELECT :io_sum_local_evictions_after > :io_sum_local_evictions_before; + ?column? +---------- + t +(1 row) + +SELECT :io_sum_local_reads_after > :io_sum_local_reads_before; + ?column? +---------- + t +(1 row) + +SELECT :io_sum_local_writes_after > :io_sum_local_writes_before; + ?column? +---------- + t +(1 row) + +SELECT :io_sum_local_extends_after > :io_sum_local_extends_before; + ?column? +---------- + t +(1 row) + +RESET temp_buffers; +-- Test that reuse of strategy buffers and reads of blocks into these reused +-- buffers while VACUUMing are tracked in pg_stat_io. +-- Set wal_skip_threshold smaller than the expected size of +-- test_io_vac_strategy so that, even if wal_level is minimal, VACUUM FULL will +-- fsync the newly rewritten test_io_vac_strategy instead of writing it to WAL. +-- Writing it to WAL will result in the newly written relation pages being in +-- shared buffers -- preventing us from testing BAS_VACUUM BufferAccessStrategy +-- reads. +SET wal_skip_threshold = '1 kB'; +SELECT sum(reused) AS io_sum_vac_strategy_reuses_before FROM pg_stat_io WHERE io_context = 'vacuum' \gset +SELECT sum(read) AS io_sum_vac_strategy_reads_before FROM pg_stat_io WHERE io_context = 'vacuum' \gset +CREATE TABLE test_io_vac_strategy(a int, b int) WITH (autovacuum_enabled = 'false'); +INSERT INTO test_io_vac_strategy SELECT i, i from generate_series(1, 8000)i; +-- Ensure that the next VACUUM will need to perform IO by rewriting the table +-- first with VACUUM (FULL). +VACUUM (FULL) test_io_vac_strategy; +VACUUM (PARALLEL 0) test_io_vac_strategy; +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT sum(reused) AS io_sum_vac_strategy_reuses_after FROM pg_stat_io WHERE io_context = 'vacuum' \gset +SELECT sum(read) AS io_sum_vac_strategy_reads_after FROM pg_stat_io WHERE io_context = 'vacuum' \gset +SELECT :io_sum_vac_strategy_reads_after > :io_sum_vac_strategy_reads_before; + ?column? +---------- + t +(1 row) + +SELECT :io_sum_vac_strategy_reuses_after > :io_sum_vac_strategy_reuses_before; + ?column? +---------- + t +(1 row) + +RESET wal_skip_threshold; +-- Test that extends done by a CTAS, which uses a BAS_BULKWRITE +-- BufferAccessStrategy, are tracked in pg_stat_io. +SELECT sum(extended) AS io_sum_bulkwrite_strategy_extends_before FROM pg_stat_io WHERE io_context = 'bulkwrite' \gset +CREATE TABLE test_io_bulkwrite_strategy AS SELECT i FROM generate_series(1,100)i; +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT sum(extended) AS io_sum_bulkwrite_strategy_extends_after FROM pg_stat_io WHERE io_context = 'bulkwrite' \gset +SELECT :io_sum_bulkwrite_strategy_extends_after > :io_sum_bulkwrite_strategy_extends_before; + ?column? +---------- + t +(1 row) + +-- Test IO stats reset +SELECT sum(evicted) + sum(reused) + sum(extended) + sum(files_synced) + sum(read) + sum(written) AS io_stats_pre_reset FROM pg_stat_io \gset +SELECT pg_stat_reset_shared('io'); + pg_stat_reset_shared +---------------------- + +(1 row) + +SELECT sum(evicted) + sum(reused) + sum(extended) + sum(files_synced) + sum(read) + sum(written) AS io_stats_post_reset FROM pg_stat_io \gset +SELECT :io_stats_post_reset < :io_stats_pre_reset; + ?column? +---------- + t +(1 row) + -- End of Stats Test diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql index b4d6753c71..962ae5b281 100644 --- a/src/test/regress/sql/stats.sql +++ b/src/test/regress/sql/stats.sql @@ -536,4 +536,142 @@ SELECT pg_stat_get_replication_slot(NULL); SELECT pg_stat_get_subscription_stats(NULL); +-- Test that the following operations are tracked in pg_stat_io: +-- - reads of target blocks into shared buffers +-- - writes of shared buffers to permanent storage +-- - extends of relations using shared buffers +-- - fsyncs done to ensure the durability of data dirtying shared buffers + +-- There is no test for blocks evicted from shared buffers, because we cannot +-- be sure of the state of shared buffers at the point the test is run. + +-- Create a regular table and insert some data to generate IOCONTEXT_NORMAL +-- extends. +SELECT sum(extended) AS io_sum_shared_extends_before + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +CREATE TABLE test_io_shared(a int); +INSERT INTO test_io_shared SELECT i FROM generate_series(1,100)i; +SELECT pg_stat_force_next_flush(); +SELECT sum(extended) AS io_sum_shared_extends_after + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +SELECT :io_sum_shared_extends_after > :io_sum_shared_extends_before; + +-- After a checkpoint, there should be some additional IOCONTEXT_NORMAL writes +-- and fsyncs. +-- The second checkpoint ensures that stats from the first checkpoint have been +-- reported and protects against any potential races amongst the table +-- creation, a possible timing-triggered checkpoint, and the explicit +-- checkpoint in the test. +SELECT sum(written) AS io_sum_shared_writes_before + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +SELECT sum(files_synced) AS io_sum_shared_fsyncs_before + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +CHECKPOINT; +CHECKPOINT; +SELECT sum(written) AS io_sum_shared_writes_after + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +SELECT sum(files_synced) AS io_sum_shared_fsyncs_after + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset + +SELECT :io_sum_shared_writes_after > :io_sum_shared_writes_before; +SELECT current_setting('fsync') = 'off' OR :io_sum_shared_fsyncs_after > :io_sum_shared_fsyncs_before; + +-- Change the tablespace so that the table is rewritten directly, then SELECT +-- from it to cause it to be read back into shared buffers. +SET allow_in_place_tablespaces = true; +CREATE TABLESPACE test_io_shared_stats_tblspc LOCATION ''; +SELECT sum(read) AS io_sum_shared_reads_before + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +ALTER TABLE test_io_shared SET TABLESPACE test_io_shared_stats_tblspc; +-- SELECT from the table so that it is read into shared buffers and io_context +-- 'normal', io_object 'relation' reads are counted. +SELECT COUNT(*) FROM test_io_shared; +SELECT pg_stat_force_next_flush(); +SELECT sum(read) AS io_sum_shared_reads_after + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +SELECT :io_sum_shared_reads_after > :io_sum_shared_reads_before; +DROP TABLE test_io_shared; +DROP TABLESPACE test_io_shared_stats_tblspc; + +-- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io: +-- - eviction of local buffers in order to reuse them +-- - reads of temporary table blocks into local buffers +-- - writes of local buffers to permanent storage +-- - extends of temporary tables + +-- Set temp_buffers to a low value so that we can trigger writes with fewer +-- inserted tuples. Do so in a new session in case temporary tables have been +-- accessed by previous tests in this session. +\c +SET temp_buffers TO '1MB'; +CREATE TEMPORARY TABLE test_io_local(a int, b TEXT); +SELECT sum(extended) AS io_sum_local_extends_before + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +SELECT sum(evicted) AS io_sum_local_evictions_before + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +SELECT sum(written) AS io_sum_local_writes_before + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +-- Insert tuples into the temporary table, generating extends in the stats. +-- Insert enough values that we need to reuse and write out dirty local +-- buffers, generating evictions and writes. +INSERT INTO test_io_local SELECT generate_series(1, 8000) as id, repeat('a', 100); + +SELECT sum(read) AS io_sum_local_reads_before + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +-- Read in evicted buffers, generating reads. +SELECT COUNT(*) FROM test_io_local; +SELECT pg_stat_force_next_flush(); +SELECT sum(evicted) AS io_sum_local_evictions_after + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +SELECT sum(read) AS io_sum_local_reads_after + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +SELECT sum(written) AS io_sum_local_writes_after + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +SELECT sum(extended) AS io_sum_local_extends_after + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +SELECT :io_sum_local_evictions_after > :io_sum_local_evictions_before; +SELECT :io_sum_local_reads_after > :io_sum_local_reads_before; +SELECT :io_sum_local_writes_after > :io_sum_local_writes_before; +SELECT :io_sum_local_extends_after > :io_sum_local_extends_before; +RESET temp_buffers; + +-- Test that reuse of strategy buffers and reads of blocks into these reused +-- buffers while VACUUMing are tracked in pg_stat_io. + +-- Set wal_skip_threshold smaller than the expected size of +-- test_io_vac_strategy so that, even if wal_level is minimal, VACUUM FULL will +-- fsync the newly rewritten test_io_vac_strategy instead of writing it to WAL. +-- Writing it to WAL will result in the newly written relation pages being in +-- shared buffers -- preventing us from testing BAS_VACUUM BufferAccessStrategy +-- reads. +SET wal_skip_threshold = '1 kB'; +SELECT sum(reused) AS io_sum_vac_strategy_reuses_before FROM pg_stat_io WHERE io_context = 'vacuum' \gset +SELECT sum(read) AS io_sum_vac_strategy_reads_before FROM pg_stat_io WHERE io_context = 'vacuum' \gset +CREATE TABLE test_io_vac_strategy(a int, b int) WITH (autovacuum_enabled = 'false'); +INSERT INTO test_io_vac_strategy SELECT i, i from generate_series(1, 8000)i; +-- Ensure that the next VACUUM will need to perform IO by rewriting the table +-- first with VACUUM (FULL). +VACUUM (FULL) test_io_vac_strategy; +VACUUM (PARALLEL 0) test_io_vac_strategy; +SELECT pg_stat_force_next_flush(); +SELECT sum(reused) AS io_sum_vac_strategy_reuses_after FROM pg_stat_io WHERE io_context = 'vacuum' \gset +SELECT sum(read) AS io_sum_vac_strategy_reads_after FROM pg_stat_io WHERE io_context = 'vacuum' \gset +SELECT :io_sum_vac_strategy_reads_after > :io_sum_vac_strategy_reads_before; +SELECT :io_sum_vac_strategy_reuses_after > :io_sum_vac_strategy_reuses_before; +RESET wal_skip_threshold; + +-- Test that extends done by a CTAS, which uses a BAS_BULKWRITE +-- BufferAccessStrategy, are tracked in pg_stat_io. +SELECT sum(extended) AS io_sum_bulkwrite_strategy_extends_before FROM pg_stat_io WHERE io_context = 'bulkwrite' \gset +CREATE TABLE test_io_bulkwrite_strategy AS SELECT i FROM generate_series(1,100)i; +SELECT pg_stat_force_next_flush(); +SELECT sum(extended) AS io_sum_bulkwrite_strategy_extends_after FROM pg_stat_io WHERE io_context = 'bulkwrite' \gset +SELECT :io_sum_bulkwrite_strategy_extends_after > :io_sum_bulkwrite_strategy_extends_before; + +-- Test IO stats reset +SELECT sum(evicted) + sum(reused) + sum(extended) + sum(files_synced) + sum(read) + sum(written) AS io_stats_pre_reset FROM pg_stat_io \gset +SELECT pg_stat_reset_shared('io'); +SELECT sum(evicted) + sum(reused) + sum(extended) + sum(files_synced) + sum(read) + sum(written) AS io_stats_post_reset FROM pg_stat_io \gset +SELECT :io_stats_post_reset < :io_stats_pre_reset; + -- End of Stats Test diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 9336bf9796..ae871165cf 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3372,6 +3372,7 @@ intset_internal_node intset_leaf_node intset_node intvKEY +io_stat_col itemIdCompact itemIdCompactData iterator -- 2.38.1