From 9e3b7f9eaa40ada876442ff2daf3b6ef86b13bc0 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Thu, 16 Sep 2021 23:14:03 +0000 Subject: [PATCH v6 1/1] Do fewer directory scans of archive_status. Presently, the archive_status directory is scanned for each file to archive. When there are many archive status files (e.g., archive_command has been failing for a long time), these directory scans can take much longer, which significantly slows the rate of archival. With this change, the archiver will attempt to skip the directory scan by simply incrementing the segment number of the last archived file. This should greatly improve the rate of archival when there are many files in archive_status. To ensure timeline history files are archived as quickly as possible, XLogArchiveNotify() forces the archiver to do a new directory scan as soon as the .ready file for one is created. To ensure that .ready files that are created out-of-order are eventually picked up, the archiver forces a directory scan every few minutes. --- src/backend/access/transam/xlogarchive.c | 11 ++ src/backend/postmaster/pgarch.c | 171 ++++++++++++++++++++++++++----- src/include/postmaster/pgarch.h | 1 + 3 files changed, 158 insertions(+), 25 deletions(-) diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c index 26b023e754..caabda80d3 100644 --- a/src/backend/access/transam/xlogarchive.c +++ b/src/backend/access/transam/xlogarchive.c @@ -489,6 +489,17 @@ XLogArchiveNotify(const char *xlog) return; } + /* + * Force a directory scan if we are archiving anything but a regular + * WAL file. The archiver tries to choose files to archive by + * incrementing the segment number of the last-archived file, which + * means that other file types (e.g., timeline history files) could be + * left behind indefinitely. By forcing a directory scan for these + * other file types, we can ensure the archiver will pick them up. + */ + if (!IsXLogFileName(xlog)) + PgArchForceDirScan(); + /* Notify archiver that it's got something to do */ if (IsUnderPostmaster) PgArchWakeup(); diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c index 74a7d7c4d0..67af58b335 100644 --- a/src/backend/postmaster/pgarch.c +++ b/src/backend/postmaster/pgarch.c @@ -47,6 +47,7 @@ #include "storage/proc.h" #include "storage/procsignal.h" #include "storage/shmem.h" +#include "storage/spin.h" #include "utils/guc.h" #include "utils/ps_status.h" @@ -76,6 +77,14 @@ typedef struct PgArchData { int pgprocno; /* pgprocno of archiver process */ + + /* + * Forces a directory scan in pgarch_readyXlog(). Protected by + * arch_lck. + */ + bool force_dir_scan; + + slock_t arch_lck; } PgArchData; @@ -85,6 +94,9 @@ typedef struct PgArchData */ static time_t last_sigterm_time = 0; static PgArchData *PgArch = NULL; +static XLogSegNo last_archived_segno = 0; +static TimeLineID last_archived_tli = 0; +static TimestampTz last_dir_scan = 0; /* * Flags set by interrupt handlers for later service in the main loop. @@ -103,6 +115,7 @@ static bool pgarch_readyXlog(char *xlog); static void pgarch_archiveDone(char *xlog); static void pgarch_die(int code, Datum arg); static void HandlePgArchInterrupts(void); +static bool higher_arch_priority(const char *a, const char *b); /* Report shared memory space needed by PgArchShmemInit */ Size @@ -129,6 +142,7 @@ PgArchShmemInit(void) /* First time through, so initialize */ MemSet(PgArch, 0, PgArchShmemSize()); PgArch->pgprocno = INVALID_PGPROCNO; + SpinLockInit(&PgArch->arch_lck); } } @@ -325,6 +339,19 @@ pgarch_ArchiverCopyLoop(void) { char xlog[MAX_XFN_CHARS + 1]; + /* + * Force a directory scan to find the next file to archive in the first + * call to pgarch_readyXlog(). This is important to do because the + * archiver may just be starting up or may have failed previously (and + * already advanced our next anticipated segment number to archive). + * + * Forcing a directory scan at the beginning of every call to + * pgarch_ArchiverCopyLoop() might be unnecessary in some cases and + * could theoretically result in slower rates of archival, but it's not + * clear if there is any significant impact in practice. + */ + PgArchForceDirScan(); + /* * loop through all xlogs with archive_status of .ready and archive * them...mostly we expect this to be a single file, though it is possible @@ -600,26 +627,72 @@ pgarch_archiveXlog(char *xlog) static bool pgarch_readyXlog(char *xlog) { - /* - * open xlog status directory and read through list of xlogs that have the - * .ready suffix, looking for earliest file. It is possible to optimise - * this code, though only a single file is expected on the vast majority - * of calls, so.... - */ char XLogArchiveStatusDir[MAXPGPATH]; + char basename[MAX_XFN_CHARS + 1]; DIR *rldir; struct dirent *rlde; bool found = false; - bool historyFound = false; + bool force_dir_scan; + TimestampTz now; + + /* + * We first try to choose a new file to archive without a directory + * scan. If possible, we simply increment the segment number of the + * last archived WAL file and generate the next anticipated WAL file + * name. + * + * However, in some cases, we must perform a directory scan to find the + * next file to archive (e.g., the file is not a regular WAL file). + * This is necessary because incrementing the segment number won't help + * us find other files besides regular WAL files, and .ready files may + * sometimes be created out of order. To handle other file types, + * XLogArchiveNotify() will request a directory scan when creating the + * .ready file. To handle out of order .ready files, we force a + * directory scan every 3 minutes. + */ + SpinLockAcquire(&PgArch->arch_lck); + force_dir_scan = PgArch->force_dir_scan; + PgArch->force_dir_scan = false; + SpinLockRelease(&PgArch->arch_lck); + + now = GetCurrentTimestamp(); + if (force_dir_scan || + TimestampDifferenceExceeds(last_dir_scan, now, 180000)) + { + last_dir_scan = now; + force_dir_scan = true; + } + + if (!force_dir_scan) + { + struct stat st; + char readyfile[MAXPGPATH]; + + last_archived_segno++; + XLogFileName(basename, last_archived_tli, last_archived_segno, wal_segment_size); + StatusFilePath(readyfile, basename, ".ready"); + if (stat(readyfile, &st) == 0) + { + strcpy(xlog, basename); + return true; + } + else if (errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", readyfile))); + } + + /* + * Open the archive status directory and read through the list of files + * with the .ready suffix, looking for the earliest file. + */ snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status"); rldir = AllocateDir(XLogArchiveStatusDir); while ((rlde = ReadDir(rldir, XLogArchiveStatusDir)) != NULL) { int basenamelen = (int) strlen(rlde->d_name) - 6; - char basename[MAX_XFN_CHARS + 1]; - bool ishistory; /* Ignore entries with unexpected number of characters */ if (basenamelen < MIN_XFN_CHARS || @@ -638,34 +711,82 @@ pgarch_readyXlog(char *xlog) memcpy(basename, rlde->d_name, basenamelen); basename[basenamelen] = '\0'; - /* Is this a history file? */ - ishistory = IsTLHistoryFileName(basename); - /* - * Consume the file to archive. History files have the highest - * priority. If this is the first file or the first history file - * ever, copy it. In the presence of a history file already chosen as - * target, ignore all other files except history files which have been - * generated for an older timeline than what is already chosen as - * target to archive. + * If this is the first file or this file has a higher archive + * priority than the current one stored, copy it. */ - if (!found || (ishistory && !historyFound)) + if (!found || higher_arch_priority(basename, xlog)) { strcpy(xlog, basename); found = true; - historyFound = ishistory; - } - else if (ishistory || !historyFound) - { - if (strcmp(basename, xlog) < 0) - strcpy(xlog, basename); } } FreeDir(rldir); + /* + * If we found a regular WAL file, update our in-memory state. + * Otherwise, we have to do a directory scan the next time around. + */ + if (found && IsXLogFileName(xlog)) + XLogFromFileName(xlog, &last_archived_tli, &last_archived_segno, wal_segment_size); + else + PgArchForceDirScan(); + return found; } +/* + * higher_arch_priority + * + * Compares the archival priority of the two file names. If "a" has a + * higher priority than "b", true is returned. Otherwise, false is + * returned. + */ +static bool +higher_arch_priority(const char *a, const char *b) +{ + bool a_ishistory = IsTLHistoryFileName(a); + bool b_ishistory = IsTLHistoryFileName(b); + bool a_isbackuphistory = IsBackupHistoryFileName(a); + bool b_isbackuphistory = IsBackupHistoryFileName(b); + + /* + * Timeline history files have a higher priority than everything else. + * Backup history files are given the second highest priority so that + * the archiver picks them up when a directory scan is forced. We do + * not give .partial files a higher priority, as it is expected that + * the archiver process will eventually fall back to a directory scan + * due to the timeline switch. + */ + if (a_ishistory || b_ishistory) + { + if (a_ishistory != b_ishistory) + return a_ishistory; + } + else if (a_isbackuphistory || b_isbackuphistory) + { + if (a_isbackuphistory != b_isbackuphistory) + return a_isbackuphistory; + } + + return (strcmp(a, b) < 0); +} + +/* + * PgArchForceDirScan + * + * When called, the next call to pgarch_readyXlog() will perform a + * directory scan. This is useful for ensuring that important files such + * as timeline history files are archived as quickly as possible. + */ +void +PgArchForceDirScan(void) +{ + SpinLockAcquire(&PgArch->arch_lck); + PgArch->force_dir_scan = true; + SpinLockRelease(&PgArch->arch_lck); +} + /* * pgarch_archiveDone * diff --git a/src/include/postmaster/pgarch.h b/src/include/postmaster/pgarch.h index 1e47a143e1..732615be57 100644 --- a/src/include/postmaster/pgarch.h +++ b/src/include/postmaster/pgarch.h @@ -31,5 +31,6 @@ extern void PgArchShmemInit(void); extern bool PgArchCanRestart(void); extern void PgArchiverMain(void) pg_attribute_noreturn(); extern void PgArchWakeup(void); +extern void PgArchForceDirScan(void); #endif /* _PGARCH_H */ -- 2.16.6