From 777b5265c296b09dd842e5b93f617bafd0f00a93 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Mon, 13 Feb 2023 17:11:29 -0800 Subject: [PATCH v4 01/15] Revise pg_pwrite_zeros() - add offset parameter - avoid memset() of zerobuf on every call - don't initialize the whole IOV array unnecessarily - don't handle the smaller trailing write in a separate write call --- src/include/common/file_utils.h | 2 +- src/common/file_utils.c | 62 ++++++++---------------------- src/backend/access/transam/xlog.c | 2 +- src/bin/pg_basebackup/walmethods.c | 2 +- 4 files changed, 20 insertions(+), 48 deletions(-) diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h index bda6d3a5413..b7efa1226d6 100644 --- a/src/include/common/file_utils.h +++ b/src/include/common/file_utils.h @@ -44,6 +44,6 @@ extern ssize_t pg_pwritev_with_retry(int fd, int iovcnt, off_t offset); -extern ssize_t pg_pwrite_zeros(int fd, size_t size); +extern ssize_t pg_pwrite_zeros(int fd, size_t size, off_t offset); #endif /* FILE_UTILS_H */ diff --git a/src/common/file_utils.c b/src/common/file_utils.c index 4dae534152f..93b5d42c5d1 100644 --- a/src/common/file_utils.c +++ b/src/common/file_utils.c @@ -537,62 +537,34 @@ pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset) * is returned with errno set. */ ssize_t -pg_pwrite_zeros(int fd, size_t size) +pg_pwrite_zeros(int fd, size_t size, off_t offset) { - PGAlignedBlock zbuffer; /* worth BLCKSZ */ - size_t zbuffer_sz; + static const PGAlignedBlock zbuffer = {0}; /* worth BLCKSZ */ + void *zerobuf_addr = unconstify(PGAlignedBlock *, &zbuffer)->data; struct iovec iov[PG_IOV_MAX]; - int blocks; - size_t remaining_size = 0; - int i; - ssize_t written; + size_t remaining_size = size; ssize_t total_written = 0; - zbuffer_sz = sizeof(zbuffer.data); - - /* Zero-fill the buffer. */ - memset(zbuffer.data, 0, zbuffer_sz); - - /* Prepare to write out a lot of copies of our zero buffer at once. */ - for (i = 0; i < lengthof(iov); ++i) - { - iov[i].iov_base = zbuffer.data; - iov[i].iov_len = zbuffer_sz; - } - /* Loop, writing as many blocks as we can for each system call. */ - blocks = size / zbuffer_sz; - remaining_size = size % zbuffer_sz; - for (i = 0; i < blocks;) + while (remaining_size > 0) { - int iovcnt = Min(blocks - i, lengthof(iov)); - off_t offset = i * zbuffer_sz; + int iovcnt = 0; + ssize_t written; + + for (; iovcnt < PG_IOV_MAX && remaining_size > 0; iovcnt++) + { + size_t this_iov_size = Min(remaining_size, BLCKSZ); + + iov[iovcnt].iov_base = zerobuf_addr; + iov[iovcnt].iov_len = this_iov_size; + remaining_size -= this_iov_size; + } written = pg_pwritev_with_retry(fd, iov, iovcnt, offset); - - if (written < 0) - return written; - - i += iovcnt; - total_written += written; - } - - /* Now, write the remaining size, if any, of the file with zeros. */ - if (remaining_size > 0) - { - /* We'll never write more than one block here */ - int iovcnt = 1; - - /* Jump on to the end of previously written blocks */ - off_t offset = i * zbuffer_sz; - - iov[0].iov_len = remaining_size; - - written = pg_pwritev_with_retry(fd, iov, iovcnt, offset); - if (written < 0) return written; + offset += written; total_written += written; } diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index f9f0f6db8d1..786b26054cf 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -2981,7 +2981,7 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, * indirect blocks are down on disk. Therefore, fdatasync(2) or * O_DSYNC will be sufficient to sync future writes to the log file. */ - rc = pg_pwrite_zeros(fd, wal_segment_size); + rc = pg_pwrite_zeros(fd, wal_segment_size, 0); if (rc < 0) save_errno = errno; diff --git a/src/bin/pg_basebackup/walmethods.c b/src/bin/pg_basebackup/walmethods.c index 54014e2b84d..6d14b988cb6 100644 --- a/src/bin/pg_basebackup/walmethods.c +++ b/src/bin/pg_basebackup/walmethods.c @@ -222,7 +222,7 @@ dir_open_for_write(WalWriteMethod *wwmethod, const char *pathname, { ssize_t rc; - rc = pg_pwrite_zeros(fd, pad_to_size); + rc = pg_pwrite_zeros(fd, pad_to_size, 0); if (rc < 0) { -- 2.38.0