diff --git a/src/bin/pg_basebackup/receivelog.c b/src/bin/pg_basebackup/receivelog.c index e1bd4ad..d8145e9 100644 --- a/src/bin/pg_basebackup/receivelog.c +++ b/src/bin/pg_basebackup/receivelog.c @@ -40,6 +40,9 @@ static PGresult *HandleCopyStream(PGconn *conn, XLogRecPtr startpos, static bool ReadEndOfStreamingResult(PGresult *res, XLogRecPtr *startpos, uint32 *timeline); +static int fsync_parent_path(const char *fname); +static int fsync_fname_ext(const char *fname, bool isdir); +static int durable_rename(const char *oldfile, const char *newfile); static bool mark_file_as_archived(const char *basedir, const char *fname) @@ -58,17 +61,13 @@ mark_file_as_archived(const char *basedir, const char *fname) return false; } - if (fsync(fd) != 0) - { - fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"), - progname, tmppath, strerror(errno)); - - close(fd); + close(fd); + if (fsync_fname_ext(tmppath, false) != 0) return false; - } - close(fd); + if (fsync_parent_path(tmppath) != 0) + return false; return true; } @@ -107,6 +106,10 @@ open_walfile(XLogRecPtr startpoint, uint32 timeline, char *basedir, /* * Verify that the file is either empty (just created), or a complete * XLogSegSize segment. Anything in between indicates a corrupt file. + * + * XXX: This means that we might not restart if a crash occurs before the + * fsync below. We probably should create the file in a temporary path + * like the backend does... */ if (fstat(f, &statbuf) != 0) { @@ -120,6 +123,16 @@ open_walfile(XLogRecPtr startpoint, uint32 timeline, char *basedir, { /* File is open and ready to use */ walfile = f; + + /* + * fsync, in case of a previous crash between padding and fsyncing the + * file. + */ + if (fsync_fname_ext(fn, false) != 0) + return false; + if (fsync_parent_path(fn) != 0) + return false; + return true; } if (statbuf.st_size != 0) @@ -148,6 +161,15 @@ open_walfile(XLogRecPtr startpoint, uint32 timeline, char *basedir, } free(zerobuf); + /* + * fsync WAL file and containing directory, to ensure the file is + * persistently created and zeroed. + */ + if (fsync_fname_ext(fn, false) != 0) + return false; + if (fsync_parent_path(fn) != 0) + return false; + if (lseek(f, SEEK_SET, 0) != 0) { fprintf(stderr, @@ -208,10 +230,9 @@ close_walfile(char *basedir, char *partial_suffix, bool mark_done) snprintf(oldfn, sizeof(oldfn), "%s/%s%s", basedir, current_walfile_name, partial_suffix); snprintf(newfn, sizeof(newfn), "%s/%s", basedir, current_walfile_name); - if (rename(oldfn, newfn) != 0) + if (durable_rename(oldfn, newfn) != 0) { - fprintf(stderr, _("%s: could not rename file \"%s\": %s\n"), - progname, current_walfile_name, strerror(errno)); + /* durable_rename produced a log entry */ return false; } } @@ -386,14 +407,6 @@ writeTimeLineHistoryFile(char *basedir, TimeLineID tli, char *filename, return false; } - if (fsync(fd) != 0) - { - close(fd); - fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"), - progname, tmppath, strerror(errno)); - return false; - } - if (close(fd) != 0) { fprintf(stderr, _("%s: could not close file \"%s\": %s\n"), @@ -404,10 +417,9 @@ writeTimeLineHistoryFile(char *basedir, TimeLineID tli, char *filename, /* * Now move the completed history file into place with its final name. */ - if (rename(tmppath, path) < 0) + if (durable_rename(tmppath, path) < 0) { - fprintf(stderr, _("%s: could not rename file \"%s\" to \"%s\": %s\n"), - progname, tmppath, path, strerror(errno)); + /* durable_rename produced a log entry */ return false; } @@ -833,6 +845,132 @@ ReadEndOfStreamingResult(PGresult *res, XLogRecPtr *startpos, uint32 *timeline) } /* + * fsync_fname_ext -- Try to fsync a file or directory + * + * Returns 0 if the operation succeeded, -1 otherwise. + * + * XXX: This is a near-duplicate of initdb.c's fsync_fname_ext(); they should + * be unified into a common place. + */ +static int +fsync_fname_ext(const char *fname, bool isdir) +{ + int fd; + int flags; + int returncode; + + /* + * Some OSs require directories to be opened read-only whereas other + * systems don't allow us to fsync files opened read-only; so we need both + * cases here. Using O_RDWR will cause us to fail to fsync files that are + * not writable by our userid, but we assume that's OK. + */ + flags = PG_BINARY; + if (!isdir) + flags |= O_RDWR; + else + flags |= O_RDONLY; + + /* + * Open the file, silently ignoring errors about unreadable files (or + * unsupported operations, e.g. opening a directory under Windows), and + * logging others. + */ + fd = open(fname, flags); + if (fd < 0) + { + if (isdir && (errno == EISDIR || errno == EACCES)) + return 0; + fprintf(stderr, _("%s: could not open file \"%s\": %s\n"), + progname, fname, strerror(errno)); + return -1; + } + + returncode = fsync(fd); + + /* + * Some OSes don't allow us to fsync directories at all, so we can ignore + * those errors. Anything else needs to be reported. + */ + if (returncode != 0 && !(isdir && errno == EBADF)) + { + fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"), + progname, fname, strerror(errno)); + close(fd); + return -1; + } + + close(fd); + return 0; +} + +/* + * fsync_parent_path -- fsync the parent path of a file or directory + * + * This is aimed at making file operations persistent on disk in case of + * an OS crash or power failure. + */ +static int +fsync_parent_path(const char *fname) +{ + char parentpath[MAXPGPATH]; + + strlcpy(parentpath, fname, MAXPGPATH); + get_parent_directory(parentpath); + + /* + * get_parent_directory() returns an empty string if the input argument is + * just a file name (see comments in path.c), so handle that as being the + * current directory. + */ + if (strlen(parentpath) == 0) + strlcpy(parentpath, ".", MAXPGPATH); + + if (fsync_fname_ext(parentpath, true) != 0) + return -1; + + return 0; +} + +/* + * durable_rename -- rename(2) wrapper, issuing fsyncs required for durability + * + * Wrapper around rename, similar to the backend version. Note that this + * version does not fsync the target file before the rename, as it's unlikely + * to be helpful for current and prospective users. + */ +static int +durable_rename(const char *oldfile, const char *newfile) +{ + /* + * First fsync the old path, to ensure that it is properly persistent on + * disk. + */ + if (fsync_fname_ext(oldfile, false) != 0) + return -1; + + /* Time to do the real deal... */ + if (rename(oldfile, newfile) != 0) + { + fprintf(stderr, _("%s: could not rename file \"%s\" to \"%s\": %s\n"), + progname, oldfile, newfile, strerror(errno)); + return -1; + } + + /* + * To guarantee renaming the file is persistent, fsync the file with its + * new name, and its containing directory. + */ + if (fsync_fname_ext(newfile, false) != 0) + return -1; + + if (fsync_parent_path(newfile) != 0) + return -1; + + return 0; +} + +/* * The main loop of ReceiveXLogStream. Handles the COPY stream after * initiating streaming with the START_STREAMING command. *