From 4b373d4aae653535d181bea0f896546edbb70212 Mon Sep 17 00:00:00 2001 From: Jerry Jelinek Date: Tue, 5 Mar 2019 22:17:24 +0000 Subject: [PATCH] cow filesystem --- doc/src/sgml/config.sgml | 19 +++++ src/backend/access/transam/xlog.c | 101 +++++++++++++++++--------- src/backend/utils/misc/guc.c | 13 ++++ src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/access/xlog.h | 1 + 5 files changed, 101 insertions(+), 34 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 6d42b7afe7..6cf0c6afe0 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -3573,6 +3573,25 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows + + wal_cow_filesystem (boolean) + + wal_cow_filesystem configuration parameter + + + + + This parameter should only be set to on when the WAL + resides on a Copy-On-Write (COW) + filesystem. + Enabling this option adjusts some behavior to take advantage of the + filesystem characteristics (for example, recycling WAL files and + zero-filling new WAL files are disabled). + + + + + wal_sender_timeout (integer) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index ecd12fc53a..6e5474fa97 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -94,6 +94,7 @@ bool wal_log_hints = false; bool wal_compression = false; char *wal_consistency_checking_string = NULL; bool *wal_consistency_checking = NULL; +bool wal_cow_filesystem = false; bool log_checkpoints = false; int sync_method = DEFAULT_SYNC_METHOD; int wal_level = WAL_LEVEL_MINIMAL; @@ -3216,6 +3217,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) XLogSegNo max_segno; int fd; int nbytes; + bool fail; XLogFilePath(path, ThisTimeLineID, logsegno, wal_segment_size); @@ -3255,39 +3257,62 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) (errcode_for_file_access(), errmsg("could not create file \"%s\": %m", tmppath))); - /* - * Zero-fill the file. We have to do this the hard way to ensure that all - * the file space has really been allocated --- on platforms that allow - * "holes" in files, just seeking to the end doesn't allocate intermediate - * space. This way, we know that we have all the space and (after the - * fsync below) that all the indirect blocks are down on disk. Therefore, - * fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the - * log file. - */ memset(zbuffer.data, 0, XLOG_BLCKSZ); - for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ) - { + + if (!wal_cow_filesystem) { + /* + * In non-CoW filesystems, zero-fill the file. We have to do this the + * hard way to ensure that all the file space has really been + * allocated --- on platforms that allow "holes" in files, just seeking + * to the end doesn't allocate intermediate space. This way, we know + * that we have all the space and (after the fsync below) that all the + * indirect blocks are down on disk. Therefore, fdatasync(2) or + * O_DSYNC will be sufficient to sync future writes to the log file. + */ + fail = false; /* keep compiler quiet */ + for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ) + { + errno = 0; + pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE); + if ((int) write(fd, zbuffer.data, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ) + fail = true; + pgstat_report_wait_end(); + if (fail) + break; + } + } + else + { + /* + * In CoW filesystems, seeking to the end and writing a solitary byte + * is enough. + */ errno = 0; + fail = false; /* keep compiler quiet */ pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE); - if ((int) write(fd, zbuffer.data, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ) - { - int save_errno = errno; + if (lseek(fd, wal_segment_size - 1, SEEK_SET) == -1 || + (int) write(fd, zbuffer.data, 1) != (int) 1) + fail = true; + pgstat_report_wait_end(); + } - /* - * If we fail to make the file, delete it to release disk space - */ - unlink(tmppath); + if (fail) + { + int save_errno = errno; - close(fd); + /* + * If we fail to make the file, delete it to release disk space + */ + unlink(tmppath); - /* if write didn't set errno, assume problem is no disk space */ - errno = save_errno ? save_errno : ENOSPC; + close(fd); - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not write to file \"%s\": %m", tmppath))); - } - pgstat_report_wait_end(); + /* if write didn't set errno, assume problem is no disk space */ + errno = save_errno ? save_errno : ENOSPC; + + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\": %m", tmppath))); } pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC); @@ -4053,14 +4078,18 @@ RemoveXlogFile(const char *segname, XLogRecPtr RedoRecPtr, XLogRecPtr endptr) XLogSegNo endlogSegNo; XLogSegNo recycleSegNo; - /* - * Initialize info about where to try to recycle to. - */ - XLByteToSeg(endptr, endlogSegNo, wal_segment_size); - if (RedoRecPtr == InvalidXLogRecPtr) - recycleSegNo = endlogSegNo + 10; + if (!wal_cow_filesystem) { + /* + * Initialize info about where to try to recycle to. + */ + XLByteToSeg(endptr, endlogSegNo, wal_segment_size); + if (RedoRecPtr == InvalidXLogRecPtr) + recycleSegNo = endlogSegNo + 10; + else + recycleSegNo = XLOGfileslop(RedoRecPtr); + } else - recycleSegNo = XLOGfileslop(RedoRecPtr); + recycleSegNo = (XLogSegNo) 0; /* keep compiler quiet */ snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname); @@ -4068,8 +4097,12 @@ RemoveXlogFile(const char *segname, XLogRecPtr RedoRecPtr, XLogRecPtr endptr) * Before deleting the file, see if it can be recycled as a future log * segment. Only recycle normal files, pg_standby for example can create * symbolic links pointing to a separate archive directory. + * + * Skip recycling on COW filesystems, though. It's better to create + * new files each time. */ - if (endlogSegNo <= recycleSegNo && + if (!wal_cow_filesystem && + endlogSegNo <= recycleSegNo && lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) && InstallXLogFileSegment(&endlogSegNo, path, true, recycleSegNo, true)) diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 156d147c85..4c64ddd4b9 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1177,6 +1177,19 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"wal_cow_filesystem", PGC_SUSET, WAL_SETTINGS, + gettext_noop("WAL is stored on a Copy-On-Write filesystem."), + gettext_noop("This option adjusts behavior to take advantage of " + "filesystem characteristics specific to CoW filesystems, " + "improving performance. " + "It should be enabled on ZFS and other similar filesystems.") + }, + &wal_cow_filesystem, + false, + NULL, NULL, NULL + }, + { {"log_checkpoints", PGC_SIGHUP, LOGGING_WHAT, gettext_noop("Logs each checkpoint."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index bd6ea65d0c..6607ec8135 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -206,6 +206,7 @@ #wal_compression = off # enable compression of full-page writes #wal_log_hints = off # also do full page writes of non-critical updates # (change requires restart) +#wal_cow_filesystem = off # is pg_wal on a Copy-on-Write filesystem? #wal_buffers = -1 # min 32kB, -1 sets based on shared_buffers # (change requires restart) #wal_writer_delay = 200ms # 1-10000 milliseconds diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index f90a6a9139..fa9d69affe 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -116,6 +116,7 @@ extern bool EnableHotStandby; extern bool fullPageWrites; extern bool wal_log_hints; extern bool wal_compression; +extern bool wal_cow_filesystem; extern bool *wal_consistency_checking; extern char *wal_consistency_checking_string; extern bool log_checkpoints; -- 2.15.1