From f6b18a3ef41f29d16b677e3e3df283554aa33fda Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy Date: Sun, 7 Aug 2022 05:05:18 +0000 Subject: [PATCH v2] Use pg_pwritev_with_retry() instead of write() in walmethods.c Use pg_pwritev_with_retry() while prepadding a WAL segment instead of write() in walmethods.c dir_open_for_write() to avoid partial writes. As the pg_pwritev_with_retry() function uses pwritev, we can avoid explicit lseek() to seek to the beginning of the WAL segment. These changes are inline with how core postgres initializes the WAL segment in XLogFileInitInternal(). --- src/backend/access/transam/xlog.c | 35 ++++++------------------- src/bin/pg_basebackup/walmethods.c | 19 +++----------- src/common/file_utils.c | 42 ++++++++++++++++++++++++++++++ src/include/common/file_utils.h | 3 +++ 4 files changed, 57 insertions(+), 42 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 34f0150d1e..3ed344008d 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -2916,7 +2916,6 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path) { char tmppath[MAXPGPATH]; - PGAlignedXLogBlock zbuffer; XLogSegNo installed_segno; XLogSegNo max_segno; int fd; @@ -2960,14 +2959,11 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, (errcode_for_file_access(), errmsg("could not create file \"%s\": %m", tmppath))); - memset(zbuffer.data, 0, XLOG_BLCKSZ); - pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE); save_errno = 0; if (wal_init_zero) { - struct iovec iov[PG_IOV_MAX]; - int blocks; + ssize_t rc; /* * Zero-fill the file. With this setting, we do this the hard way to @@ -2978,32 +2974,17 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, * indirect blocks are down on disk. Therefore, fdatasync(2) or * O_DSYNC will be sufficient to sync future writes to the log file. */ + rc = pg_pwritev_with_retry_and_write_zeros(fd, wal_segment_size); - /* Prepare to write out a lot of copies of our zero buffer at once. */ - for (int i = 0; i < lengthof(iov); ++i) - { - iov[i].iov_base = zbuffer.data; - iov[i].iov_len = XLOG_BLCKSZ; - } - - /* Loop, writing as many blocks as we can for each system call. */ - blocks = wal_segment_size / XLOG_BLCKSZ; - for (int i = 0; i < blocks;) - { - int iovcnt = Min(blocks - i, lengthof(iov)); - off_t offset = i * XLOG_BLCKSZ; - - if (pg_pwritev_with_retry(fd, iov, iovcnt, offset) < 0) - { - save_errno = errno; - break; - } - - i += iovcnt; - } + if (rc < 0) + save_errno = errno; } else { + PGAlignedXLogBlock zbuffer; + + memset(zbuffer.data, 0, XLOG_BLCKSZ); + /* * Otherwise, seeking to the end and writing a solitary byte is * enough. diff --git a/src/bin/pg_basebackup/walmethods.c b/src/bin/pg_basebackup/walmethods.c index e90aa0ba37..beb8071ecc 100644 --- a/src/bin/pg_basebackup/walmethods.c +++ b/src/bin/pg_basebackup/walmethods.c @@ -209,23 +209,12 @@ dir_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_ /* Do pre-padding on non-compressed files */ if (pad_to_size && dir_data->compression_algorithm == PG_COMPRESSION_NONE) { - PGAlignedXLogBlock zerobuf; - int bytes; + ssize_t rc; - memset(zerobuf.data, 0, XLOG_BLCKSZ); - for (bytes = 0; bytes < pad_to_size; bytes += XLOG_BLCKSZ) - { - errno = 0; - if (write(fd, zerobuf.data, XLOG_BLCKSZ) != XLOG_BLCKSZ) - { - /* If write didn't set errno, assume problem is no disk space */ - dir_data->lasterrno = errno ? errno : ENOSPC; - close(fd); - return NULL; - } - } + errno = 0; + rc = pg_pwritev_with_retry_and_write_zeros(fd, pad_to_size); - if (lseek(fd, 0, SEEK_SET) != 0) + if (rc < 0) { dir_data->lasterrno = errno; close(fd); diff --git a/src/common/file_utils.c b/src/common/file_utils.c index 4af216e56c..daaa426e3d 100644 --- a/src/common/file_utils.c +++ b/src/common/file_utils.c @@ -525,3 +525,45 @@ pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset) return sum; } + +/* + * A convenience wrapper for pg_pwritev_with_retry() that zero-fills the given + * file of size total_sz in batches of size XLOG_BLCKSZ. + */ +ssize_t +pg_pwritev_with_retry_and_write_zeros(int fd, size_t total_sz) +{ + PGAlignedXLogBlock zbuffer; + struct iovec iov[PG_IOV_MAX]; + int blocks; + size_t block_sz; + ssize_t rc; + + block_sz = sizeof(zbuffer.data); + + memset(zbuffer.data, 0, block_sz); + + /* Prepare to write out a lot of copies of our zero buffer at once. */ + for (int i = 0; i < lengthof(iov); ++i) + { + iov[i].iov_base = zbuffer.data; + iov[i].iov_len = block_sz; + } + + /* Loop, writing as many blocks as we can for each system call. */ + blocks = total_sz / block_sz; + for (int i = 0; i < blocks;) + { + int iovcnt = Min(blocks - i, lengthof(iov)); + off_t offset = i * block_sz; + + rc = pg_pwritev_with_retry(fd, iov, iovcnt, offset); + + if ( rc < 0) + break; + + i += iovcnt; + } + + return rc; +} diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h index 2c5dbcb0b1..a5af97bb85 100644 --- a/src/include/common/file_utils.h +++ b/src/include/common/file_utils.h @@ -44,4 +44,7 @@ extern ssize_t pg_pwritev_with_retry(int fd, int iovcnt, off_t offset); +extern ssize_t pg_pwritev_with_retry_and_write_zeros(int fd, + size_t total_sz); + #endif /* FILE_UTILS_H */ -- 2.34.1