From 879cce732348d599445802f00e8650ee521fb239 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Mon, 9 Jan 2023 14:42:25 -0500 Subject: [PATCH v48 3/4] Add system view tracking IO ops per backend type Add pg_stat_io, a system view which tracks the number of IOOps (evictions, reuses, reads, writes, extensions, and fsyncs) done on each IOObject (relation, temp relation) in each IOContext ("normal" and those using a BufferAccessStrategy) by each type of backend (e.g. client backend, checkpointer). Some BackendTypes do not accumulate IO operations statistics and will not be included in the view. Some IOContexts are not used by some BackendTypes and will not be in the view. For example, checkpointer does not use a BufferAccessStrategy (currently), so there will be no rows for BufferAccessStrategy IOContexts for checkpointer. Some IOObjects are never operated on in some IOContexts or by some BackendTypes. These rows are omitted from the view. For example, checkpointer will never operate on IOOBJECT_TEMP_RELATION data, so those rows are omitted. Some IOOps are invalid in combination with certain IOContexts and certain IOObjects. Those cells will be NULL in the view to distinguish between 0 observed IOOps of that type and an invalid combination. For example, temporary tables are not fsynced so cells for all BackendTypes for IOOBJECT_TEMP_RELATION and IOOP_FSYNC will be NULL. Some BackendTypes never perform certain IOOps. Those cells will also be NULL in the view. For example, bgwriter should not perform reads. View stats are populated with statistics incremented when a backend performs an IO Operation and maintained by the cumulative statistics subsystem. Each row of the view shows stats for a particular BackendType, IOObject, IOContext combination (e.g. a client backend's operations on permanent relations in shared buffers) and each column in the view is the total number of IO Operations done (e.g. writes). So a cell in the view would be, for example, the number of blocks of relation data written from shared buffers by client backends since the last stats reset. In anticipation of tracking WAL IO and non-block-oriented IO (such as temporary file IO), the "op_bytes" column specifies the unit of the "reads", "writes", and "extends" columns for a given row. Note that some of the cells in the view are redundant with fields in pg_stat_bgwriter (e.g. buffers_backend), however these have been kept in pg_stat_bgwriter for backwards compatibility. Deriving the redundant pg_stat_bgwriter stats from the IO operations stats structures was also problematic due to the separate reset targets for 'bgwriter' and 'io'. Suggested by Andres Freund Catalog version should be bumped. Author: Melanie Plageman Reviewed-by: Andres Freund Discussion: https://www.postgresql.org/message-id/flat/20200124195226.lth52iydq2n2uilq%40alap3.anarazel.de --- contrib/amcheck/expected/check_heap.out | 34 ++++ contrib/amcheck/sql/check_heap.sql | 27 +++ src/backend/catalog/system_views.sql | 15 ++ src/backend/utils/adt/pgstatfuncs.c | 140 ++++++++++++++ src/include/catalog/pg_proc.dat | 9 + src/test/regress/expected/rules.out | 12 ++ src/test/regress/expected/stats.out | 234 ++++++++++++++++++++++++ src/test/regress/sql/stats.sql | 148 +++++++++++++++ src/tools/pgindent/typedefs.list | 1 + 9 files changed, 620 insertions(+) diff --git a/contrib/amcheck/expected/check_heap.out b/contrib/amcheck/expected/check_heap.out index c010361025..e4785141a6 100644 --- a/contrib/amcheck/expected/check_heap.out +++ b/contrib/amcheck/expected/check_heap.out @@ -66,6 +66,22 @@ SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'ALL-VISIBLE'); INSERT INTO heaptest (a, b) (SELECT gs, repeat('x', gs) FROM generate_series(1,50) gs); +-- pg_stat_io test: +-- verify_heapam always uses a BAS_BULKREAD BufferAccessStrategy, whereas a +-- sequential scan does so only if the table is large enough when compared to +-- shared buffers (see initscan()). CREATE DATABASE ... also unconditionally +-- uses a BAS_BULKREAD strategy, but we have chosen to use a tablespace and +-- verify_heapam to provide coverage instead of adding another expensive +-- operation to the main regression test suite. +-- +-- Create an alternative tablespace and move the heaptest table to it, causing +-- it to be rewritten and all the blocks to reliably evicted from shared +-- buffers -- guaranteeing actual reads when we next select from it. +SET allow_in_place_tablespaces = true; +CREATE TABLESPACE regress_test_stats_tblspc LOCATION ''; +SELECT sum(reads) AS stats_bulkreads_before + FROM pg_stat_io WHERE io_context = 'bulkread' \gset +ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc; -- Check that valid options are not rejected nor corruption reported -- for a non-empty table SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none'); @@ -88,6 +104,23 @@ SELECT * FROM verify_heapam(relation := 'heaptest', startblock := 0, endblock := -------+--------+--------+----- (0 rows) +-- verify_heapam should have read in the page written out by +-- ALTER TABLE ... SET TABLESPACE ... +-- causing an additional bulkread, which should be reflected in pg_stat_io. +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT sum(reads) AS stats_bulkreads_after + FROM pg_stat_io WHERE io_context = 'bulkread' \gset +SELECT :stats_bulkreads_after > :stats_bulkreads_before; + ?column? +---------- + t +(1 row) + CREATE ROLE regress_heaptest_role; -- verify permissions are checked (error due to function not callable) SET ROLE regress_heaptest_role; @@ -195,6 +228,7 @@ ERROR: cannot check relation "test_foreign_table" DETAIL: This operation is not supported for foreign tables. -- cleanup DROP TABLE heaptest; +DROP TABLESPACE regress_test_stats_tblspc; DROP TABLE test_partition; DROP TABLE test_partitioned; DROP OWNED BY regress_heaptest_role; -- permissions diff --git a/contrib/amcheck/sql/check_heap.sql b/contrib/amcheck/sql/check_heap.sql index 298de6886a..6794ca4eb0 100644 --- a/contrib/amcheck/sql/check_heap.sql +++ b/contrib/amcheck/sql/check_heap.sql @@ -20,11 +20,29 @@ SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'NONE'); SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'ALL-FROZEN'); SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'ALL-VISIBLE'); + -- Add some data so subsequent tests are not entirely trivial INSERT INTO heaptest (a, b) (SELECT gs, repeat('x', gs) FROM generate_series(1,50) gs); +-- pg_stat_io test: +-- verify_heapam always uses a BAS_BULKREAD BufferAccessStrategy, whereas a +-- sequential scan does so only if the table is large enough when compared to +-- shared buffers (see initscan()). CREATE DATABASE ... also unconditionally +-- uses a BAS_BULKREAD strategy, but we have chosen to use a tablespace and +-- verify_heapam to provide coverage instead of adding another expensive +-- operation to the main regression test suite. +-- +-- Create an alternative tablespace and move the heaptest table to it, causing +-- it to be rewritten and all the blocks to reliably evicted from shared +-- buffers -- guaranteeing actual reads when we next select from it. +SET allow_in_place_tablespaces = true; +CREATE TABLESPACE regress_test_stats_tblspc LOCATION ''; +SELECT sum(reads) AS stats_bulkreads_before + FROM pg_stat_io WHERE io_context = 'bulkread' \gset +ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc; + -- Check that valid options are not rejected nor corruption reported -- for a non-empty table SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none'); @@ -32,6 +50,14 @@ SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'all-frozen'); SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'all-visible'); SELECT * FROM verify_heapam(relation := 'heaptest', startblock := 0, endblock := 0); +-- verify_heapam should have read in the page written out by +-- ALTER TABLE ... SET TABLESPACE ... +-- causing an additional bulkread, which should be reflected in pg_stat_io. +SELECT pg_stat_force_next_flush(); +SELECT sum(reads) AS stats_bulkreads_after + FROM pg_stat_io WHERE io_context = 'bulkread' \gset +SELECT :stats_bulkreads_after > :stats_bulkreads_before; + CREATE ROLE regress_heaptest_role; -- verify permissions are checked (error due to function not callable) @@ -110,6 +136,7 @@ SELECT * FROM verify_heapam('test_foreign_table', -- cleanup DROP TABLE heaptest; +DROP TABLESPACE regress_test_stats_tblspc; DROP TABLE test_partition; DROP TABLE test_partitioned; DROP OWNED BY regress_heaptest_role; -- permissions diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index d2a8c82900..f875742068 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1116,6 +1116,21 @@ CREATE VIEW pg_stat_bgwriter AS pg_stat_get_buf_alloc() AS buffers_alloc, pg_stat_get_bgwriter_stat_reset_time() AS stats_reset; +CREATE VIEW pg_stat_io AS +SELECT + b.backend_type, + b.io_context, + b.io_object, + b.reads, + b.writes, + b.extends, + b.op_bytes, + b.evictions, + b.reuses, + b.fsyncs, + b.stats_reset +FROM pg_stat_get_io() b; + CREATE VIEW pg_stat_wal AS SELECT w.wal_records, diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 6df9f06a20..284bb2a698 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1234,6 +1234,146 @@ pg_stat_get_buf_alloc(PG_FUNCTION_ARGS) PG_RETURN_INT64(pgstat_fetch_stat_bgwriter()->buf_alloc); } +/* +* When adding a new column to the pg_stat_io view, add a new enum value +* here above IO_NUM_COLUMNS. +*/ +typedef enum io_stat_col +{ + IO_COL_BACKEND_TYPE, + IO_COL_IO_CONTEXT, + IO_COL_IO_OBJECT, + IO_COL_READS, + IO_COL_WRITES, + IO_COL_EXTENDS, + IO_COL_CONVERSION, + IO_COL_EVICTIONS, + IO_COL_REUSES, + IO_COL_FSYNCS, + IO_COL_RESET_TIME, + IO_NUM_COLUMNS, +} io_stat_col; + +/* + * When adding a new IOOp, add a new io_stat_col and add a case to this + * function returning the corresponding io_stat_col. + */ +static io_stat_col +pgstat_get_io_op_index(IOOp io_op) +{ + switch (io_op) + { + case IOOP_EVICT: + return IO_COL_EVICTIONS; + case IOOP_READ: + return IO_COL_READS; + case IOOP_REUSE: + return IO_COL_REUSES; + case IOOP_WRITE: + return IO_COL_WRITES; + case IOOP_EXTEND: + return IO_COL_EXTENDS; + case IOOP_FSYNC: + return IO_COL_FSYNCS; + } + + elog(ERROR, "unrecognized IOOp value: %d", io_op); + pg_unreachable(); +} + +Datum +pg_stat_get_io(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo; + PgStat_IO *backends_io_stats; + Datum reset_time; + + InitMaterializedSRF(fcinfo, 0); + rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + + backends_io_stats = pgstat_fetch_stat_io(); + + reset_time = TimestampTzGetDatum(backends_io_stats->stat_reset_timestamp); + + for (BackendType bktype = B_INVALID; bktype < BACKEND_NUM_TYPES; bktype++) + { + Datum bktype_desc = CStringGetTextDatum(GetBackendTypeDesc(bktype)); + PgStat_BktypeIO *bktype_stats = &backends_io_stats->stats[bktype]; + + /* + * In Assert builds, we can afford an extra loop through all of the + * counters checking that only expected stats are non-zero, since it + * keeps the non-Assert code cleaner. + */ + Assert(pgstat_bktype_io_stats_valid(bktype_stats, bktype)); + + /* + * For those BackendTypes without IO Operation stats, skip + * representing them in the view altogether. + */ + if (!pgstat_tracks_io_bktype(bktype)) + continue; + + for (IOContext io_context = IOCONTEXT_FIRST; + io_context < IOCONTEXT_NUM_TYPES; io_context++) + { + const char *context_name = pgstat_get_io_context_name(io_context); + + for (IOObject io_obj = IOOBJECT_FIRST; + io_obj < IOOBJECT_NUM_TYPES; io_obj++) + { + const char *obj_name = pgstat_get_io_object_name(io_obj); + + Datum values[IO_NUM_COLUMNS] = {0}; + bool nulls[IO_NUM_COLUMNS] = {0}; + + /* + * Some combinations of IOContext, IOObject, and BackendType + * are not valid for any type of IOOp. In such cases, omit the + * entire row from the view. + */ + if (!pgstat_tracks_io_object(bktype, io_context, io_obj)) + continue; + + values[IO_COL_BACKEND_TYPE] = bktype_desc; + values[IO_COL_IO_CONTEXT] = CStringGetTextDatum(context_name); + values[IO_COL_IO_OBJECT] = CStringGetTextDatum(obj_name); + values[IO_COL_RESET_TIME] = TimestampTzGetDatum(reset_time); + + /* + * Hard-code this to the value of BLCKSZ for now. Future + * values could include XLOG_BLCKSZ, once WAL IO is tracked, + * and constant multipliers, once non-block-oriented IO (e.g. + * temporary file IO) is tracked. + */ + values[IO_COL_CONVERSION] = Int64GetDatum(BLCKSZ); + for (IOOp io_op = IOOP_FIRST; io_op < IOOP_NUM_TYPES; io_op++) + { + int col_idx = pgstat_get_io_op_index(io_op); + + /* + * Some combinations of BackendType and IOOp, of IOContext + * and IOOp, and of IOObject and IOOp are not tracked. Set + * these cells in the view NULL. + */ + nulls[col_idx] = !pgstat_tracks_io_op(bktype, io_context, io_obj, io_op); + + if (nulls[col_idx]) + continue; + + values[col_idx] = + Int64GetDatum(bktype_stats->data[io_context][io_obj][io_op]); + } + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + } + } + + return (Datum) 0; +} + /* * Returns statistics of WAL activity */ diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 3810de7b22..57a889cf49 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5690,6 +5690,15 @@ proname => 'pg_stat_get_buf_alloc', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => '', prosrc => 'pg_stat_get_buf_alloc' }, +{ oid => '8459', descr => 'statistics: per backend type IO statistics', + proname => 'pg_stat_get_io', provolatile => 'v', + prorows => '30', proretset => 't', + proparallel => 'r', prorettype => 'record', proargtypes => '', + proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,timestamptz}', + proargmodes => '{o,o,o,o,o,o,o,o,o,o,o}', + proargnames => '{backend_type,io_context,io_object,reads,writes,extends,op_bytes,evictions,reuses,fsyncs,stats_reset}', + prosrc => 'pg_stat_get_io' }, + { oid => '1136', descr => 'statistics: information about WAL activity', proname => 'pg_stat_get_wal', proisstrict => 'f', provolatile => 's', proparallel => 'r', prorettype => 'record', proargtypes => '', diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index a969ae63eb..dd5ddffc4d 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1876,6 +1876,18 @@ pg_stat_gssapi| SELECT s.pid, s.gss_enc AS encrypted FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid, query_id) WHERE (s.client_port IS NOT NULL); +pg_stat_io| SELECT b.backend_type, + b.io_context, + b.io_object, + b.reads, + b.writes, + b.extends, + b.op_bytes, + b.evictions, + b.reuses, + b.fsyncs, + b.stats_reset + FROM pg_stat_get_io() b(backend_type, io_context, io_object, reads, writes, extends, op_bytes, evictions, reuses, fsyncs, stats_reset); pg_stat_progress_analyze| SELECT s.pid, s.datid, d.datname, diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out index 1d84407a03..3bd4e66fa8 100644 --- a/src/test/regress/expected/stats.out +++ b/src/test/regress/expected/stats.out @@ -1126,4 +1126,238 @@ SELECT pg_stat_get_subscription_stats(NULL); (1 row) +-- Test that the following operations are tracked in pg_stat_io: +-- - reads of target blocks into shared buffers +-- - writes of shared buffers to permanent storage +-- - extends of relations using shared buffers +-- - fsyncs done to ensure the durability of data dirtying shared buffers +-- There is no test for blocks evicted from shared buffers, because we cannot +-- be sure of the state of shared buffers at the point the test is run. +-- Create a regular table and insert some data to generate IOCONTEXT_NORMAL +-- extends. +SELECT sum(extends) AS io_sum_shared_before_extends + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +CREATE TABLE test_io_shared(a int); +INSERT INTO test_io_shared SELECT i FROM generate_series(1,100)i; +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT sum(extends) AS io_sum_shared_after_extends + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +SELECT :io_sum_shared_after_extends > :io_sum_shared_before_extends; + ?column? +---------- + t +(1 row) + +-- After a checkpoint, there should be some additional IOCONTEXT_NORMAL writes +-- and fsyncs. +SELECT sum(writes) AS writes, sum(fsyncs) AS fsyncs + FROM pg_stat_io + WHERE io_context = 'normal' AND io_object = 'relation' \gset io_sum_shared_before_ +-- See comment above for rationale for two explicit CHECKPOINTs. +CHECKPOINT; +CHECKPOINT; +SELECT sum(writes) AS writes, sum(fsyncs) AS fsyncs + FROM pg_stat_io + WHERE io_context = 'normal' AND io_object = 'relation' \gset io_sum_shared_after_ +SELECT :io_sum_shared_after_writes > :io_sum_shared_before_writes; + ?column? +---------- + t +(1 row) + +SELECT current_setting('fsync') = 'off' + OR :io_sum_shared_after_fsyncs > :io_sum_shared_before_fsyncs; + ?column? +---------- + t +(1 row) + +-- Change the tablespace so that the table is rewritten directly, then SELECT +-- from it to cause it to be read back into shared buffers. +SET allow_in_place_tablespaces = true; +CREATE TABLESPACE regress_io_stats_tblspc LOCATION ''; +SELECT sum(reads) AS io_sum_shared_before_reads + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +ALTER TABLE test_io_shared SET TABLESPACE regress_io_stats_tblspc; +-- SELECT from the table so that the data is read into shared buffers and +-- io_context 'normal', io_object 'relation' reads are counted. +SELECT COUNT(*) FROM test_io_shared; + count +------- + 100 +(1 row) + +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT sum(reads) AS io_sum_shared_after_reads + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads; + ?column? +---------- + t +(1 row) + +-- Drop the table so we can drop the tablespace later. +DROP TABLE test_io_shared; +-- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io: +-- - eviction of local buffers in order to reuse them +-- - reads of temporary table blocks into local buffers +-- - writes of local buffers to permanent storage +-- - extends of temporary tables +-- Set temp_buffers to its minimum so that we can trigger writes with fewer +-- inserted tuples. Do so in a new session in case temporary tables have been +-- accessed by previous tests in this session. +\c +SET temp_buffers TO 100; +CREATE TEMPORARY TABLE test_io_local(a int, b TEXT); +SELECT sum(extends) AS extends, sum(evictions) AS evictions, sum(writes) AS writes + FROM pg_stat_io + WHERE io_context = 'normal' AND io_object = 'temp relation' \gset io_sum_local_before_ +-- Insert tuples into the temporary table, generating extends in the stats. +-- Insert enough values that we need to reuse and write out dirty local +-- buffers, generating evictions and writes. +INSERT INTO test_io_local SELECT generate_series(1, 5000) as id, repeat('a', 200); +-- Ensure the table is large enough to exceed our temp_buffers setting. +SELECT pg_table_size('test_io_local') / current_setting('block_size')::int8 > 100; + ?column? +---------- + t +(1 row) + +SELECT sum(reads) AS io_sum_local_before_reads + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +-- Read in evicted buffers, generating reads. +SELECT COUNT(*) FROM test_io_local; + count +------- + 5000 +(1 row) + +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT sum(evictions) AS evictions, + sum(reads) AS reads, + sum(writes) AS writes, + sum(extends) AS extends + FROM pg_stat_io + WHERE io_context = 'normal' AND io_object = 'temp relation' \gset io_sum_local_after_ +SELECT :io_sum_local_after_evictions > :io_sum_local_before_evictions, + :io_sum_local_after_reads > :io_sum_local_before_reads, + :io_sum_local_after_writes > :io_sum_local_before_writes, + :io_sum_local_after_extends > :io_sum_local_before_extends; + ?column? | ?column? | ?column? | ?column? +----------+----------+----------+---------- + t | t | t | t +(1 row) + +-- Change the tablespaces so that the temporary table is rewritten to other +-- local buffers, exercising a different codepath than standard local buffer +-- writes. +ALTER TABLE test_io_local SET TABLESPACE regress_io_stats_tblspc; +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT sum(writes) AS io_sum_local_new_tblspc_writes + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +SELECT :io_sum_local_new_tblspc_writes > :io_sum_local_after_writes; + ?column? +---------- + t +(1 row) + +-- Drop the table so we can drop the tablespace later. +DROP TABLE test_io_local; +RESET temp_buffers; +DROP TABLESPACE regress_io_stats_tblspc; +-- Test that reuse of strategy buffers and reads of blocks into these reused +-- buffers while VACUUMing are tracked in pg_stat_io. +-- Set wal_skip_threshold smaller than the expected size of +-- test_io_vac_strategy so that, even if wal_level is minimal, VACUUM FULL will +-- fsync the newly rewritten test_io_vac_strategy instead of writing it to WAL. +-- Writing it to WAL will result in the newly written relation pages being in +-- shared buffers -- preventing us from testing BAS_VACUUM BufferAccessStrategy +-- reads. +SET wal_skip_threshold = '1 kB'; +SELECT sum(reuses) AS reuses, sum(reads) AS reads + FROM pg_stat_io WHERE io_context = 'vacuum' \gset io_sum_vac_strategy_before_ +CREATE TABLE test_io_vac_strategy(a int, b int) WITH (autovacuum_enabled = 'false'); +INSERT INTO test_io_vac_strategy SELECT i, i from generate_series(1, 8000)i; +-- Ensure that the next VACUUM will need to perform IO by rewriting the table +-- first with VACUUM (FULL). +VACUUM (FULL) test_io_vac_strategy; +VACUUM (PARALLEL 0) test_io_vac_strategy; +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT sum(reuses) AS reuses, sum(reads) AS reads + FROM pg_stat_io WHERE io_context = 'vacuum' \gset io_sum_vac_strategy_after_ +SELECT :io_sum_vac_strategy_after_reads > :io_sum_vac_strategy_before_reads, + :io_sum_vac_strategy_after_reuses > :io_sum_vac_strategy_before_reuses; + ?column? | ?column? +----------+---------- + t | t +(1 row) + +RESET wal_skip_threshold; +-- Test that extends done by a CTAS, which uses a BAS_BULKWRITE +-- BufferAccessStrategy, are tracked in pg_stat_io. +SELECT sum(extends) AS io_sum_bulkwrite_strategy_extends_before + FROM pg_stat_io WHERE io_context = 'bulkwrite' \gset +CREATE TABLE test_io_bulkwrite_strategy AS SELECT i FROM generate_series(1,100)i; +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT sum(extends) AS io_sum_bulkwrite_strategy_extends_after + FROM pg_stat_io WHERE io_context = 'bulkwrite' \gset +SELECT :io_sum_bulkwrite_strategy_extends_after > :io_sum_bulkwrite_strategy_extends_before; + ?column? +---------- + t +(1 row) + +-- Test IO stats reset +SELECT pg_stat_have_stats('io', 0, 0); + pg_stat_have_stats +-------------------- + t +(1 row) + +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset + FROM pg_stat_io \gset +SELECT pg_stat_reset_shared('io'); + pg_stat_reset_shared +---------------------- + +(1 row) + +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset + FROM pg_stat_io \gset +SELECT :io_stats_post_reset < :io_stats_pre_reset; + ?column? +---------- + t +(1 row) + -- End of Stats Test diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql index b4d6753c71..163ed38faf 100644 --- a/src/test/regress/sql/stats.sql +++ b/src/test/regress/sql/stats.sql @@ -536,4 +536,152 @@ SELECT pg_stat_get_replication_slot(NULL); SELECT pg_stat_get_subscription_stats(NULL); +-- Test that the following operations are tracked in pg_stat_io: +-- - reads of target blocks into shared buffers +-- - writes of shared buffers to permanent storage +-- - extends of relations using shared buffers +-- - fsyncs done to ensure the durability of data dirtying shared buffers + +-- There is no test for blocks evicted from shared buffers, because we cannot +-- be sure of the state of shared buffers at the point the test is run. + +-- Create a regular table and insert some data to generate IOCONTEXT_NORMAL +-- extends. +SELECT sum(extends) AS io_sum_shared_before_extends + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +CREATE TABLE test_io_shared(a int); +INSERT INTO test_io_shared SELECT i FROM generate_series(1,100)i; +SELECT pg_stat_force_next_flush(); +SELECT sum(extends) AS io_sum_shared_after_extends + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +SELECT :io_sum_shared_after_extends > :io_sum_shared_before_extends; + +-- After a checkpoint, there should be some additional IOCONTEXT_NORMAL writes +-- and fsyncs. +SELECT sum(writes) AS writes, sum(fsyncs) AS fsyncs + FROM pg_stat_io + WHERE io_context = 'normal' AND io_object = 'relation' \gset io_sum_shared_before_ +-- See comment above for rationale for two explicit CHECKPOINTs. +CHECKPOINT; +CHECKPOINT; +SELECT sum(writes) AS writes, sum(fsyncs) AS fsyncs + FROM pg_stat_io + WHERE io_context = 'normal' AND io_object = 'relation' \gset io_sum_shared_after_ + +SELECT :io_sum_shared_after_writes > :io_sum_shared_before_writes; +SELECT current_setting('fsync') = 'off' + OR :io_sum_shared_after_fsyncs > :io_sum_shared_before_fsyncs; + +-- Change the tablespace so that the table is rewritten directly, then SELECT +-- from it to cause it to be read back into shared buffers. +SET allow_in_place_tablespaces = true; +CREATE TABLESPACE regress_io_stats_tblspc LOCATION ''; +SELECT sum(reads) AS io_sum_shared_before_reads + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +ALTER TABLE test_io_shared SET TABLESPACE regress_io_stats_tblspc; +-- SELECT from the table so that the data is read into shared buffers and +-- io_context 'normal', io_object 'relation' reads are counted. +SELECT COUNT(*) FROM test_io_shared; +SELECT pg_stat_force_next_flush(); +SELECT sum(reads) AS io_sum_shared_after_reads + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads; +-- Drop the table so we can drop the tablespace later. +DROP TABLE test_io_shared; + +-- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io: +-- - eviction of local buffers in order to reuse them +-- - reads of temporary table blocks into local buffers +-- - writes of local buffers to permanent storage +-- - extends of temporary tables + +-- Set temp_buffers to its minimum so that we can trigger writes with fewer +-- inserted tuples. Do so in a new session in case temporary tables have been +-- accessed by previous tests in this session. +\c +SET temp_buffers TO 100; +CREATE TEMPORARY TABLE test_io_local(a int, b TEXT); +SELECT sum(extends) AS extends, sum(evictions) AS evictions, sum(writes) AS writes + FROM pg_stat_io + WHERE io_context = 'normal' AND io_object = 'temp relation' \gset io_sum_local_before_ +-- Insert tuples into the temporary table, generating extends in the stats. +-- Insert enough values that we need to reuse and write out dirty local +-- buffers, generating evictions and writes. +INSERT INTO test_io_local SELECT generate_series(1, 5000) as id, repeat('a', 200); +-- Ensure the table is large enough to exceed our temp_buffers setting. +SELECT pg_table_size('test_io_local') / current_setting('block_size')::int8 > 100; + +SELECT sum(reads) AS io_sum_local_before_reads + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +-- Read in evicted buffers, generating reads. +SELECT COUNT(*) FROM test_io_local; +SELECT pg_stat_force_next_flush(); +SELECT sum(evictions) AS evictions, + sum(reads) AS reads, + sum(writes) AS writes, + sum(extends) AS extends + FROM pg_stat_io + WHERE io_context = 'normal' AND io_object = 'temp relation' \gset io_sum_local_after_ +SELECT :io_sum_local_after_evictions > :io_sum_local_before_evictions, + :io_sum_local_after_reads > :io_sum_local_before_reads, + :io_sum_local_after_writes > :io_sum_local_before_writes, + :io_sum_local_after_extends > :io_sum_local_before_extends; + +-- Change the tablespaces so that the temporary table is rewritten to other +-- local buffers, exercising a different codepath than standard local buffer +-- writes. +ALTER TABLE test_io_local SET TABLESPACE regress_io_stats_tblspc; +SELECT pg_stat_force_next_flush(); +SELECT sum(writes) AS io_sum_local_new_tblspc_writes + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'temp relation' \gset +SELECT :io_sum_local_new_tblspc_writes > :io_sum_local_after_writes; +-- Drop the table so we can drop the tablespace later. +DROP TABLE test_io_local; +RESET temp_buffers; +DROP TABLESPACE regress_io_stats_tblspc; + +-- Test that reuse of strategy buffers and reads of blocks into these reused +-- buffers while VACUUMing are tracked in pg_stat_io. + +-- Set wal_skip_threshold smaller than the expected size of +-- test_io_vac_strategy so that, even if wal_level is minimal, VACUUM FULL will +-- fsync the newly rewritten test_io_vac_strategy instead of writing it to WAL. +-- Writing it to WAL will result in the newly written relation pages being in +-- shared buffers -- preventing us from testing BAS_VACUUM BufferAccessStrategy +-- reads. +SET wal_skip_threshold = '1 kB'; +SELECT sum(reuses) AS reuses, sum(reads) AS reads + FROM pg_stat_io WHERE io_context = 'vacuum' \gset io_sum_vac_strategy_before_ +CREATE TABLE test_io_vac_strategy(a int, b int) WITH (autovacuum_enabled = 'false'); +INSERT INTO test_io_vac_strategy SELECT i, i from generate_series(1, 8000)i; +-- Ensure that the next VACUUM will need to perform IO by rewriting the table +-- first with VACUUM (FULL). +VACUUM (FULL) test_io_vac_strategy; +VACUUM (PARALLEL 0) test_io_vac_strategy; +SELECT pg_stat_force_next_flush(); +SELECT sum(reuses) AS reuses, sum(reads) AS reads + FROM pg_stat_io WHERE io_context = 'vacuum' \gset io_sum_vac_strategy_after_ +SELECT :io_sum_vac_strategy_after_reads > :io_sum_vac_strategy_before_reads, + :io_sum_vac_strategy_after_reuses > :io_sum_vac_strategy_before_reuses; +RESET wal_skip_threshold; + +-- Test that extends done by a CTAS, which uses a BAS_BULKWRITE +-- BufferAccessStrategy, are tracked in pg_stat_io. +SELECT sum(extends) AS io_sum_bulkwrite_strategy_extends_before + FROM pg_stat_io WHERE io_context = 'bulkwrite' \gset +CREATE TABLE test_io_bulkwrite_strategy AS SELECT i FROM generate_series(1,100)i; +SELECT pg_stat_force_next_flush(); +SELECT sum(extends) AS io_sum_bulkwrite_strategy_extends_after + FROM pg_stat_io WHERE io_context = 'bulkwrite' \gset +SELECT :io_sum_bulkwrite_strategy_extends_after > :io_sum_bulkwrite_strategy_extends_before; + +-- Test IO stats reset +SELECT pg_stat_have_stats('io', 0, 0); +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset + FROM pg_stat_io \gset +SELECT pg_stat_reset_shared('io'); +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset + FROM pg_stat_io \gset +SELECT :io_stats_post_reset < :io_stats_pre_reset; + -- End of Stats Test diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 1be6e07980..a399e0a5e4 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3377,6 +3377,7 @@ intset_internal_node intset_leaf_node intset_node intvKEY +io_stat_col itemIdCompact itemIdCompactData iterator -- 2.34.1