From 4abac6442061744b791031a25a31cd45563cc111 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Tue, 21 Jan 2025 11:44:35 +1300 Subject: [PATCH v4 2/3] Increase the maximum I/O combine size to 1MB. The default value of 128kB is not changed, but the upper limit is changed from 32 blocks to 128 blocks (1MB with 8kB blocks), assuming the operating system's IOV_MAX doesn't limit us to a smaller size. This is around where some other RDBMSes seem to cap their buffer pool I/O size, and it seems like to good idea to allow experiments with that. The concrete change is to our definition of PG_IOV_MAX, which provides the maximum limit for io_combine_limit and io_max_combine_limit. It also affects a couple of other places that work with arrays of struct iovec or smaller objects on the stack, so we still don't want to use the system IOV_MAX directly without a clamp: it is not under our control and likely to be 1024. 128 seems acceptable for all our current use cases. The last Unix on our target list known to have a low IOV_MAX was Solaris before 11.4 SRU72 (it was 16, the minimum requirement for POSIX conformance, but is now 1024, matching all other systems I looked at). For Windows, we can't use real scatter/gather yet (though it's possible, in later work), so we continue to define our own IOV_MAX value of 16 and emulate preadv()/pwritev() with loops there. Someone would need to research the trade-off. This change also makes it possible for read_stream.c's internal cap of INT16_MAX to be hit, so adjust comments about that. With *_io_concurrent and io_combine_limit set to their maximum, it would want to be able to pin 128K buffers at once (= 1GB of data), but the choice of data type limits streams to 32K buffers. That could be revisited in future, but you'll probably hit other limits long before that one in your quest to run 1,000 concurrent I/Os of size 1MB. Suggested-by: Tomas Vondra Discussion: https://postgr.es/m/CA%2BhUKG%2B2T9p-%2BzM6Eeou-RAJjTML6eit1qn26f9twznX59qtCA%40mail.gmail.com --- doc/src/sgml/config.sgml | 4 ++++ src/backend/storage/aio/read_stream.c | 7 ++++--- src/backend/utils/misc/postgresql.conf.sample | 4 ++-- src/include/port/pg_iovec.h | 8 ++++++-- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index d1080dac97f..93eea7f96d2 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -2638,6 +2638,8 @@ include_dir 'conf.d' This parameter can only be set in the postgresql.conf file or on the server command line. + The maximum possible size depends on the operating system and block + size, but is typically 1MB on Unix and 128kB on Windows. The default is 128kB. @@ -2655,6 +2657,8 @@ include_dir 'conf.d' higher than the io_max_combine_limit parameter, the smaller value will silently be used instead, so both may need to be raised to increase the I/O size. + The maximum possible size depends on the operating system and block + size, but is typically 1MB on Unix and 128kB on Windows. The default is 128kB. diff --git a/src/backend/storage/aio/read_stream.c b/src/backend/storage/aio/read_stream.c index d65fa07b44c..45bdf819d57 100644 --- a/src/backend/storage/aio/read_stream.c +++ b/src/backend/storage/aio/read_stream.c @@ -515,9 +515,10 @@ read_stream_begin_impl(int flags, * finishes we don't want to have to wait for its buffers to be consumed * before starting a new one. * - * Be careful not to allow int16 to overflow (even though that's not - * possible with the current GUC range limits), allowing also for the - * spare entry and the overflow space. + * Be careful not to allow int16 to overflow. That is possible with the + * current GUC range limits, so this is an artificial limit of ~32k + * buffers and we'd need to adjust the types to exceed that. We also have + * to allow for the spare entry and the overflow space. */ max_pinned_buffers = (max_ios + 1) * io_combine_limit; max_pinned_buffers = Min(max_pinned_buffers, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index bd9a3507135..e43d803b278 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -200,9 +200,9 @@ #backend_flush_after = 0 # measured in pages, 0 disables #effective_io_concurrency = 16 # 1-1000; 0 disables prefetching #maintenance_io_concurrency = 10 # 1-1000; 0 disables prefetching -#io_max_combine_limit = 128kB # usually 1-32 blocks (depends on OS) +#io_max_combine_limit = 128kB # usually 1-128 blocks (depends on OS) # (change requires restart) -#io_combine_limit = 128kB # usually 1-32 blocks (depends on OS) +#io_combine_limit = 128kB # usually 1-128 blocks (depends on OS) #io_method = sync # sync (change requires restart) #io_max_concurrency = -1 # Max number of IOs that one process diff --git a/src/include/port/pg_iovec.h b/src/include/port/pg_iovec.h index d9891d3805d..df40c7208be 100644 --- a/src/include/port/pg_iovec.h +++ b/src/include/port/pg_iovec.h @@ -33,8 +33,12 @@ struct iovec #endif -/* Define a reasonable maximum that is safe to use on the stack. */ -#define PG_IOV_MAX Min(IOV_MAX, 32) +/* + * Define a reasonable maximum that is safe to use on the stack in arrays of + * struct iovec and other small types. The operating system could limit us to + * a number as low as 16, but most systems have 1024. + */ +#define PG_IOV_MAX Min(IOV_MAX, 128) /* * Like preadv(), but with a prefix to remind us of a side-effect: on Windows -- 2.39.5