From ab0e6954d38e70209c0fd3e4b4c33e99a09fb297 Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy Date: Sun, 17 Apr 2022 08:33:05 +0000 Subject: [PATCH v1] pg_walcleaner --- src/bin/Makefile | 1 + src/bin/pg_walcleaner/Makefile | 36 ++ src/bin/pg_walcleaner/nls.mk | 5 + src/bin/pg_walcleaner/pg_walcleaner.c | 766 ++++++++++++++++++++++++++ src/bin/pg_walcleaner/repl_slot.h | 137 +++++ src/bin/pg_walcleaner/t/001_basic.pl | 14 + 6 files changed, 959 insertions(+) create mode 100644 src/bin/pg_walcleaner/Makefile create mode 100644 src/bin/pg_walcleaner/nls.mk create mode 100644 src/bin/pg_walcleaner/pg_walcleaner.c create mode 100644 src/bin/pg_walcleaner/repl_slot.h create mode 100644 src/bin/pg_walcleaner/t/001_basic.pl diff --git a/src/bin/Makefile b/src/bin/Makefile index 7f9dde924e..045ff6a93d 100644 --- a/src/bin/Makefile +++ b/src/bin/Makefile @@ -29,6 +29,7 @@ SUBDIRS = \ pg_test_timing \ pg_upgrade \ pg_verifybackup \ + pg_walcleaner \ pg_waldump \ pgbench \ psql \ diff --git a/src/bin/pg_walcleaner/Makefile b/src/bin/pg_walcleaner/Makefile new file mode 100644 index 0000000000..dee4c8feed --- /dev/null +++ b/src/bin/pg_walcleaner/Makefile @@ -0,0 +1,36 @@ +# src/bin/pg_walcleaner/Makefile + +PGFILEDESC = "pg_walcleaner - deletes (optionally archives before deletion) unneeded PostgreSQL Write-Ahead Log files" +PGAPPICON = win32 + +subdir = src/bin/pg_walcleaner +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +OBJS = \ + $(WIN32RES) \ + pg_walcleaner.o + +all: pg_walcleaner + +pg_walcleaner: $(OBJS) | submake-libpgport + $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X) + +install: all installdirs + $(INSTALL_PROGRAM) pg_walcleaner$(X) '$(DESTDIR)$(bindir)/pg_walcleaner$(X)' + +installdirs: + $(MKDIR_P) '$(DESTDIR)$(bindir)' + +uninstall: + rm -f '$(DESTDIR)$(bindir)/pg_walcleaner$(X)' + +clean distclean maintainer-clean: + rm -f pg_walcleaner$(X) $(OBJS) + rm -rf tmp_check + +check: + $(prove_check) + +installcheck: + $(prove_installcheck) diff --git a/src/bin/pg_walcleaner/nls.mk b/src/bin/pg_walcleaner/nls.mk new file mode 100644 index 0000000000..51a73a102d --- /dev/null +++ b/src/bin/pg_walcleaner/nls.mk @@ -0,0 +1,5 @@ +# src/bin/pg_walcleaner/nls.mk +CATALOG_NAME = pg_walcleaner +GETTEXT_FILES = $(FRONTEND_COMMON_GETTEXT_FILES) pg_walcleaner.c +GETTEXT_TRIGGERS = $(FRONTEND_COMMON_GETTEXT_TRIGGERS) +GETTEXT_FLAGS = $(FRONTEND_COMMON_GETTEXT_FLAGS) diff --git a/src/bin/pg_walcleaner/pg_walcleaner.c b/src/bin/pg_walcleaner/pg_walcleaner.c new file mode 100644 index 0000000000..c6ace2e375 --- /dev/null +++ b/src/bin/pg_walcleaner/pg_walcleaner.c @@ -0,0 +1,766 @@ +/*------------------------------------------------------------------------- + * + * pg_walcleaner.c + * Tool for deleting (optionally archiving before deleting) unneeded + * PostgreSQL Write-Ahead Log (WAL) files to free up disk space in + * server down/crash because of "no space left on device" + * situations. + * + * Copyright (c) 2022, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/bin/pg_walcleaner/pg_walcleaner.c + * + *------------------------------------------------------------------------- + */ +#include "postgres_fe.h" + +#include +#include +#include + +#include "access/xlog_internal.h" +#include "common/controldata_utils.h" +#include "common/logging.h" +#include "common/string.h" +#include "getopt_long.h" +#include "pg_getopt.h" +#include "repl_slot.h" + +#define PG_WAL_DIR "pg_wal" +#define PG_REPLSLOT_DIR "pg_replslot" + +const char *progname; + +/* options and defaults */ +char *archive_command = NULL; +bool dryrun = false; +char *data_dir = NULL; +bool ignore_replication_slots = false; +bool verbose = false; + +/* + * Oldest WAL file to retain. All other files before this can safely be + * removed. + */ +char cutoff_wal_file[MAXFNAMELEN]; +ControlFileData *ControlFile = NULL; +int WalSegSz; + +static void usage(void); +static DIR *get_destination_dir(char *dest); +static void close_destination_dir(DIR *dest_dir, char *dest); +static bool ArchiveWALFile(const char *file, const char *path); +static void RemoveWALFiles(void); +static XLogRecPtr process_replslots(void); +static bool read_repl_slot(const char *name, ReplicationSlotOnDisk *s_info); + +static void +usage(void) +{ + printf(_("%s deletes unneeded PostgreSQL Write-Ahead Log (WAL) files.\n\n"), progname); + printf(_("Usage:\n %s [OPTION]...\n\n"), progname); + printf(_("\nOptions:\n")); + printf(_(" -a, --archive-command=COMMAND archive command to execute and send WAL files before deletion\n")); + printf(_(" -D, --pgdata=DATADIR data directory\n")); + printf(_(" -d, --dry-run dry run, show the names of the files that would be removed\n")); + printf(_(" -i, --ignore-replication-slots ignore replication slots to calculate oldest WAL file\n" + " i.e. WAL files required by replication slots may be deleted\n")); + printf(_(" -v, --verbose output verbose messages\n")); + printf(_(" -V, --version output version information, then exit\n")); + printf(_(" -?, --help show this help, then exit\n")); + printf(_("\nIf no data directory (DATADIR) is specified, " + "the environment variable PGDATA\nis used.\n\n")); + printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); + printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); +} + +/* + * Get destination directory. + */ +static DIR * +get_destination_dir(char *dest) +{ + DIR *dir; + + Assert(dest != NULL); + dir = opendir(dest); + if (dir == NULL) + pg_fatal("could not open directory \"%s\": %m", dest); + + return dir; +} + +/* + * Close existing directory. + */ +static void +close_destination_dir(DIR *dest_dir, char *dest) +{ + Assert(dest_dir != NULL && dest != NULL); + if (closedir(dest_dir)) + pg_fatal("could not close directory \"%s\": %m", dest); +} + +/* + * Archives a given WAL file. + */ +static bool +ArchiveWALFile(const char *file, const char *path) +{ + char xlogarchcmd[MAXPGPATH]; + char *dp; + char *endp; + const char *sp; + int rc; + + /* construct the command to be executed */ + dp = xlogarchcmd; + endp = xlogarchcmd + MAXPGPATH - 1; + *endp = '\0'; + + for (sp = archive_command; *sp; sp++) + { + if (*sp == '%') + { + switch (sp[1]) + { + case 'p': + /* %p: relative path of source file */ + sp++; + strlcpy(dp, path, endp - dp); + make_native_path(dp); + dp += strlen(dp); + break; + case 'f': + /* %f: filename of source file */ + sp++; + strlcpy(dp, file, endp - dp); + dp += strlen(dp); + break; + case '%': + /* convert %% to a single % */ + sp++; + if (dp < endp) + *dp++ = *sp; + break; + default: + /* otherwise treat the % as not special */ + if (dp < endp) + *dp++ = *sp; + break; + } + } + else + { + if (dp < endp) + *dp++ = *sp; + } + } + *dp = '\0'; + + if (verbose) + pg_log_info("executing archive command \"%s\"", xlogarchcmd); + + rc = system(xlogarchcmd); + + if (rc != 0) + { + bool is_fatal = false; + + /* + * If either the shell itself, or a called command, died on a signal, + * abort the archiver. We do this because system() ignores SIGINT and + * SIGQUIT while waiting; so a signal is very likely something that + * should have interrupted us too. Also die if the shell got a hard + * "command not found" type of error. If we overreact it's no big + * deal, the postmaster will just start the archiver again. + */ + if (wait_result_is_any_signal(rc, true)) + is_fatal = true; + + if (WIFEXITED(rc)) + { + if (is_fatal) + pg_fatal("archive command \"%s\" failed with exit code %d", + xlogarchcmd, WEXITSTATUS(rc)); + else + pg_log_error("archive command \"%s\" failed with exit code %d", + xlogarchcmd, WEXITSTATUS(rc)); + } + else if (WIFSIGNALED(rc)) + { +#if defined(WIN32) + if (is_fatal) + { + pg_log_error("archive command \"%s\" was terminated by exception 0x%X", + xlogarchcmd, WTERMSIG(rc)); + pg_log_error("See C include file \"ntstatus.h\" for a description of the hexadecimal value."); + exit(EXIT_FAILURE); + } + else + { + pg_log_error("archive command \"%s\" was terminated by exception 0x%X", + xlogarchcmd, WTERMSIG(rc)); + pg_log_error("See C include file \"ntstatus.h\" for a description of the hexadecimal value."); + } +#else + if (is_fatal) + pg_fatal("archive command \"%s\" was terminated by signal %d: %s", + xlogarchcmd, WTERMSIG(rc), pg_strsignal(WTERMSIG(rc))); + else + pg_log_error("archive command \"%s\" was terminated by signal %d: %s", + xlogarchcmd, WTERMSIG(rc), pg_strsignal(WTERMSIG(rc))); +#endif + } + else + { + if (is_fatal) + pg_fatal("archive command \"%s\" exited with unrecognized status %d", + xlogarchcmd, rc); + else + pg_log_error("archive command \"%s\" exited with unrecognized status %d", + xlogarchcmd, rc); + } + + return false; + } + + if (verbose) + pg_log_info("archived write-ahead log file \"%s\"", file); + + return true; +} + +/* + * Removes all WAL file(s) older than the cut off WAL file name. + */ +static void +RemoveWALFiles(void) +{ + int rc; + DIR *xldir; + struct dirent *xlde; + int cnt = 0; + + xldir = get_destination_dir(PG_WAL_DIR); + + while (errno = 0, (xlde = readdir(xldir)) != NULL) + { + /* ignore files that are not XLOG segments */ + if (!IsXLogFileName(xlde->d_name) && + !IsPartialXLogFileName(xlde->d_name)) + continue; + + /* + * We ignore the timeline part of the XLOG segment identifiers in + * deciding whether a segment is still needed. This ensures that we + * won't prematurely remove a segment from a parent timeline. We could + * probably be a little more proactive about removing segments of + * non-parent timelines, but that would be a whole lot more + * complicated. + * + * We use the alphanumeric sorting property of the filenames to decide + * which ones are earlier than the cutoff_wal_file file. Note that this + * means files are not removed in the order they were originally + * written, in case this worries you. + */ + if (strcmp(xlde->d_name + 8, cutoff_wal_file + 8) < 0) + { + char path[MAXPGPATH]; + + snprintf(path, sizeof(path), "%s/%s/%s", data_dir, + PG_WAL_DIR, xlde->d_name); + + if (archive_command != NULL && !dryrun) + { + /* + * If archive_command is specified, just archive the WAL file + * irrespective of whether the PostgreSQL server has archived + * it or not (don't look at the .done file). + * + * This might cause the WAL file archived multiple times if + * PostgreSQL server and pg_walcleaner uses the same archive + * location. + */ + if (ArchiveWALFile(xlde->d_name, path)) + { + if (verbose) + pg_log_info("successfully archived WAL file %s", + xlde->d_name); + } + else + { + /* + * Let's just not remove the WAL file if ArchiveWALFile() + * couldn't archive it. Note that the ArchiveWALFile() + * would have already logged an error message. + */ + continue; + } + } + + if (archive_command == NULL) + { + char archiveStatusPath[MAXPGPATH]; + struct stat stat_buf; + + /* + * Check for .ready file --- this means the PostgreSQL server + * has not yet archived the WAL file. + */ + StatusFilePath(archiveStatusPath, xlde->d_name, ".ready"); + if (stat(archiveStatusPath, &stat_buf) == 0) + { + if (verbose) + pg_log_info("WAL file \"%s\" is yet to be archived by PostgreSQL server, hence not removing it", + path); + + continue; + } + } + + cnt++; + + if (dryrun) + { + /* + * Prints the name of the file to be removed and skips the + * actual removal. The regular printout is so that the user can + * pipe the output into some other program. + */ + printf("%s\n", path); + + if (verbose) + pg_log_info("file \"%s\" would be removed", path); + + continue; + } + + if (verbose) + pg_log_info("removing file \"%s\"", path); + + rc = unlink(path); + + if (rc != 0) + pg_fatal("could not remove file \"%s\": %m", path); + } + } + + if (errno) + pg_fatal("could not read directory \"%s\": %m", PG_WAL_DIR); + + close_destination_dir(xldir, PG_WAL_DIR); + + if (cnt == 0) + { + if (dryrun) + pg_log_info("no WAL files would be removed"); + else + pg_log_info("no WAL files were removed"); + } + else + { + if (dryrun) + pg_log_info("%d WAL file(s) would be removed", cnt); + else + pg_log_info("%d WAL file(s) were removed", cnt); + } +} + +/* + * Loops over all the existing replication slots and return the oldest + * restart_lsn. + */ +static XLogRecPtr +process_replslots(void) +{ + DIR *rsdir; + struct dirent *rsde; + uint32 cnt = 0; + XLogRecPtr oldest_restart_lsn; + + rsdir = get_destination_dir(PG_REPLSLOT_DIR); + + oldest_restart_lsn = InvalidXLogRecPtr; + + while (errno = 0, (rsde = readdir(rsdir)) != NULL) + { + struct stat statbuf; + char path[MAXPGPATH]; + ReplicationSlotOnDisk s_info; + bool s_read_success; + + if (strcmp(rsde->d_name, ".") == 0 || + strcmp(rsde->d_name, "..") == 0) + continue; + + snprintf(path, sizeof(path), + PG_REPLSLOT_DIR + "/%s", + rsde->d_name); + + /* we only care about directories here, skip if it's not one */ + if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode)) + continue; + + /* we crashed while a slot was being setup or deleted, clean up */ + if (pg_str_endswith(rsde->d_name, ".tmp")) + { + if (verbose) + pg_log_info("server was crashed while the slot \"%s\" was being setup or deleted", + rsde->d_name); + + continue; + } + + /* looks like a slot in a normal state, decode its information */ + s_read_success = read_repl_slot(rsde->d_name, &s_info); + + if (!s_read_success) + continue; + + cnt++; + + if (cnt == 1) + { + /* first time */ + oldest_restart_lsn = s_info.slotdata.restart_lsn; + } + else if (!XLogRecPtrIsInvalid(s_info.slotdata.restart_lsn) && + s_info.slotdata.restart_lsn < oldest_restart_lsn) + { + oldest_restart_lsn = s_info.slotdata.restart_lsn; + } + } + + if (errno) + pg_fatal("could not read directory \"%s\": %m", PG_REPLSLOT_DIR); + + if (cnt == 0 && verbose) + pg_log_info("no replication slots were found"); + else if (cnt > 0 && verbose) + pg_log_info("oldest restart_lsn found is %X/%X", + LSN_FORMAT_ARGS(oldest_restart_lsn)); + + close_destination_dir(rsdir, PG_REPLSLOT_DIR); + + return oldest_restart_lsn; +} + +/* + * Reads given replication slot information from its disk file and return the + * contents. + */ +static bool +read_repl_slot(const char *name, ReplicationSlotOnDisk *s_info) +{ + ReplicationSlotOnDisk cp; + char slotdir[MAXPGPATH]; + char path[MAXPGPATH]; + int fd; + int readBytes; + pg_crc32c checksum; + + /* if temp file exists, just inform and continue further */ + sprintf(slotdir, PG_REPLSLOT_DIR"/%s", name); + sprintf(path, "%s/state.tmp", slotdir); + + fd = open(path, O_RDONLY | PG_BINARY, 0); + + if (fd > 0) + { + if (verbose) + pg_log_info("found temporary state file \"%s\": %m", path); + + if (close(fd) != 0) + pg_fatal("could not close file \"%s\": %m", path); + + return false; + } + + sprintf(path, "%s/state", slotdir); + + if (verbose) + pg_log_info("reading replication slot from \"%s\"", path); + + fd = open(path, O_RDONLY | PG_BINARY, 0); + + /* + * We do not need to handle this as we are rename()ing the directory into + * place only after we fsync()ed the state file. + */ + if (fd < 0) + pg_fatal("could not open file \"%s\": %m", path); + + if (verbose) + pg_log_info("reading version independent replication slot state file"); + + /* read part of statefile that's guaranteed to be version independent */ + readBytes = read(fd, &cp, ReplicationSlotOnDiskConstantSize); + + if (readBytes != ReplicationSlotOnDiskConstantSize) + { + if (readBytes < 0) + pg_fatal("could not read file \"%s\": %m", path); + else + pg_fatal("could not read file \"%s\": read %d of %zu", + path, readBytes, + (Size) ReplicationSlotOnDiskConstantSize); + } + + /* verify magic */ + if (cp.magic != SLOT_MAGIC) + pg_fatal("replication slot file \"%s\" has wrong magic number: %u instead of %u", + path, cp.magic, SLOT_MAGIC); + + /* verify version */ + if (cp.version != SLOT_VERSION) + pg_fatal("replication slot file \"%s\" has unsupported version %u", + path, cp.version); + + /* boundary check on length */ + if (cp.length != ReplicationSlotOnDiskV2Size) + pg_fatal("replication slot file \"%s\" has corrupted length %u", + path, cp.length); + + if (verbose) + pg_log_info("reading the entire replication slot state file"); + + /* now that we know the size, read the entire file */ + readBytes = read(fd, + (char *) &cp + ReplicationSlotOnDiskConstantSize, + cp.length); + + if (readBytes != cp.length) + { + if (readBytes < 0) + pg_fatal("could not read file \"%s\": %m", path); + else + pg_fatal("could not read file \"%s\": read %d of %zu", + path, readBytes, (Size) cp.length); + } + + if (close(fd) != 0) + pg_fatal("could not close file \"%s\": %m", path); + + /* now verify the CRC */ + INIT_CRC32C(checksum); + COMP_CRC32C(checksum, + (char *) &cp + ReplicationSlotOnDiskNotChecksummedSize, + ReplicationSlotOnDiskChecksummedSize); + FIN_CRC32C(checksum); + + if (!EQ_CRC32C(checksum, cp.checksum)) + pg_fatal("checksum mismatch for replication slot file \"%s\": is %u, should be %u", + path, checksum, cp.checksum); + + MemSet(s_info, 0, sizeof(ReplicationSlotOnDisk)); + memcpy(s_info, &cp, sizeof(ReplicationSlotOnDisk)); + + return true; +} + +int +main(int argc, char **argv) +{ + static struct option long_options[] = { + {"archive-command", required_argument, NULL, 'a'}, + {"dry-run", no_argument, NULL, 'd'}, + {"pgdata", required_argument, NULL, 'D'}, + {"ignore-replication-slots", no_argument, NULL, 'i'}, + {"verbose", no_argument, NULL, 'v'}, + {"version", no_argument, NULL, 'V'}, + {NULL, 0, NULL, 0} + }; + + int option; + int optindex = 0; + DIR *dir; + bool crc_ok; + XLogSegNo segno; + XLogRecPtr oldest_restart_lsn; + + pg_logging_init(argv[0]); + set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_walcleaner")); + progname = get_progname(argv[0]); + + if (argc > 1) + { + if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) + { + usage(); + exit(EXIT_SUCCESS); + } + if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) + { + puts("pg_walcleaner (PostgreSQL) " PG_VERSION); + exit(EXIT_SUCCESS); + } + } + + while ((option = getopt_long(argc, argv, "a:dD:v", + long_options, &optindex)) != -1) + { + switch (option) + { + case 'a': + archive_command = pstrdup(optarg); + break; + case 'd': + dryrun = true; + break; + case 'D': + data_dir = pstrdup(optarg); + break; + case 'i': + ignore_replication_slots = true; + break; + case 'v': + verbose = true; + break; + default: + goto bad_argument; + } + } + + /* any non-option arguments? */ + if (optind < argc) + { + pg_log_error("too many command-line arguments (first is \"%s\")", + argv[optind]); + goto bad_argument; + } + + if (data_dir == NULL) + { + data_dir = getenv("PGDATA"); + + /* If no data_dir was specified, and none could be found, error out */ + if (data_dir == NULL) + { + pg_log_error("no data directory specified"); + goto bad_argument; + } + } + + if (verbose) + pg_log_info("data directory is \"%s\"", data_dir); + + /* check existence of data directory */ + dir = get_destination_dir(data_dir); + close_destination_dir(dir, data_dir); + + if (chdir(data_dir) < 0) + pg_fatal("could not change directory to \"%s\": %m", data_dir); + + /* + * XXX: should we check if the server isn't running by looking at + * postmaster.pid file? Because we don't want this tool to be running while + * the server is up as it might interfere when checkpointer removing the + * old WAL files. But, just the presence of postmaster.pid file doesn't + * guarantee that the server is up as server can leave the postmaster.pid + * file in case of crashes (for instance, kill -9 <>). + * + * For now, let's leave the responsibility of running this tool only when + * the server isn't up to the user. + */ + + /* get a copy of the control file */ + ControlFile = get_controlfile(data_dir, &crc_ok); + if (!crc_ok) + { + pg_log_error("pg_control file contains invalid checksum\n"); + pg_log_error("Calculated CRC checksum does not match value stored in file.\n" + "Either the file is corrupt, or it has a different layout than this program\n" + "is expecting.\n\n"); + + exit(EXIT_FAILURE); + } + + /* set WAL segment size */ + WalSegSz = ControlFile->xlog_seg_size; + + if (!IsValidWalSegSize(WalSegSz)) + { + pg_log_error("invalid WAL segment size found in pg_control file\n"); + pg_log_error("The WAL segment size stored in the file, %d byte(s), is not a power of two\n" + "between 1 MB and 1 GB. The file is corrupt.\n\n", + WalSegSz); + + exit(EXIT_FAILURE); + } + + /* + * Calculate name of the WAL file containing the latest checkpoint's REDO + * start point. + */ + XLByteToSeg(ControlFile->checkPointCopy.redo, segno, WalSegSz); + XLogFileName(cutoff_wal_file, ControlFile->checkPointCopy.ThisTimeLineID, + segno, WalSegSz); + + if (verbose) + pg_log_info("checkpoint's REDO WAL file name is \"%s\"", + cutoff_wal_file); + + /* + * By default, keep WAL segments required by all the replication slots + * that were present at the time of crash. + * + * The replication slots must be dropped and recreated after the server + * is up when users choose to not keep WAL segments for them i.e. + * ignore_replication_slots is set to true. + */ + if (!ignore_replication_slots) + { + oldest_restart_lsn = process_replslots(); + + /* + * If oldest restart_lsn is older than checkpoint's REDO start point, + * then re-compute the cutoff_wal_file. + */ + if (!XLogRecPtrIsInvalid(oldest_restart_lsn) && + oldest_restart_lsn < ControlFile->checkPointCopy.redo) + { + XLByteToSeg(oldest_restart_lsn, segno, WalSegSz); + + /* + * XXX: will any of replication slots clients stream from a + * timeline other than the server's insert timeline? How do we find + * the server's insert timeline id just before the crash? + * + * Using ThisTimeLineID from checkpoint may not be correct here, + * but we anyways don't use timeline id while deleting the WAL + * files, see comments in RemoveWALFiles(). + */ + XLogFileName(cutoff_wal_file, + ControlFile->checkPointCopy.ThisTimeLineID, + segno, WalSegSz); + + if (verbose) + pg_log_info("oldest WAL file required by all replication slots is \"%s\"", + cutoff_wal_file); + } + } + + /* + * XXX: should we consider the WAL files on historical timelines as well? + * PrevTimeLineID, timeline history files etc.? + */ + + /* + * XXX: should we consider deleting the partial WAL files, backup history + * files as well on standbys or PITR/restored server? + */ + + pg_log_info("keeping WAL file \"%s\" and later", cutoff_wal_file); + + /* remove WAL files older than cutoff_wal_file */ + RemoveWALFiles(); + + return EXIT_SUCCESS; + +bad_argument: + pg_log_error_hint("Try \"%s --help\" for more information.", progname); + return EXIT_FAILURE; +} diff --git a/src/bin/pg_walcleaner/repl_slot.h b/src/bin/pg_walcleaner/repl_slot.h new file mode 100644 index 0000000000..5a785c3eb9 --- /dev/null +++ b/src/bin/pg_walcleaner/repl_slot.h @@ -0,0 +1,137 @@ +/*------------------------------------------------------------------------- + * repl_slot.h + * Replication slot data structures required for pg_walcleaner tool. + * + * Copyright (c) 2022, PostgreSQL Global Development Group + * + *------------------------------------------------------------------------- + */ +#ifndef REPL_SLOT_H +#define REPL_SLOT_H + +/* + * NOTE: All of these structures are borrowed as-is from replication/slot.c and + * replication/slot.h. Don't forget to keep both of them in sync. + */ + +/* + * Behaviour of replication slots, upon release or crash. + * + * Slots marked as PERSISTENT are crash-safe and will not be dropped when + * released. Slots marked as EPHEMERAL will be dropped when released or after + * restarts. Slots marked TEMPORARY will be dropped at the end of a session + * or on error. + * + * EPHEMERAL is used as a not-quite-ready state when creating persistent + * slots. EPHEMERAL slots can be made PERSISTENT by calling + * ReplicationSlotPersist(). For a slot that goes away at the end of a + * session, TEMPORARY is the appropriate choice. + */ +typedef enum ReplicationSlotPersistency +{ + RS_PERSISTENT, + RS_EPHEMERAL, + RS_TEMPORARY +} ReplicationSlotPersistency; + +/* + * On-Disk data of a replication slot, preserved across restarts. + */ +typedef struct ReplicationSlotPersistentData +{ + /* The slot's identifier */ + NameData name; + + /* database the slot is active on */ + Oid database; + + /* + * The slot's behaviour when being dropped (or restored after a crash). + */ + ReplicationSlotPersistency persistency; + + /* + * xmin horizon for data + * + * NB: This may represent a value that hasn't been written to disk yet; + * see notes for effective_xmin, below. + */ + TransactionId xmin; + + /* + * xmin horizon for catalog tuples + * + * NB: This may represent a value that hasn't been written to disk yet; + * see notes for effective_xmin, below. + */ + TransactionId catalog_xmin; + + /* oldest LSN that might be required by this replication slot */ + XLogRecPtr restart_lsn; + + /* restart_lsn is copied here when the slot is invalidated */ + XLogRecPtr invalidated_at; + + /* + * Oldest LSN that the client has acked receipt for. This is used as the + * start_lsn point in case the client doesn't specify one, and also as a + * safety measure to jump forwards in case the client specifies a + * start_lsn that's further in the past than this value. + */ + XLogRecPtr confirmed_flush; + + /* + * LSN at which we enabled two_phase commit for this slot or LSN at which + * we found a consistent point at the time of slot creation. + */ + XLogRecPtr two_phase_at; + + /* + * Allow decoding of prepared transactions? + */ + bool two_phase; + + /* plugin name */ + NameData plugin; +} ReplicationSlotPersistentData; + +/* + * Replication slot on-disk data structure. + */ +typedef struct ReplicationSlotOnDisk +{ + /* first part of this struct needs to be version independent */ + + /* data not covered by checksum */ + uint32 magic; + pg_crc32c checksum; + + /* data covered by checksum */ + uint32 version; + uint32 length; + + /* + * The actual data in the slot that follows can differ based on the above + * 'version'. + */ + + ReplicationSlotPersistentData slotdata; +} ReplicationSlotOnDisk; + +/* size of version independent data */ +#define ReplicationSlotOnDiskConstantSize \ + offsetof(ReplicationSlotOnDisk, slotdata) +/* size of the part of the slot not covered by the checksum */ +#define ReplicationSlotOnDiskNotChecksummedSize \ + offsetof(ReplicationSlotOnDisk, version) +/* size of the part covered by the checksum */ +#define ReplicationSlotOnDiskChecksummedSize \ + sizeof(ReplicationSlotOnDisk) - ReplicationSlotOnDiskNotChecksummedSize +/* size of the slot data that is version dependent */ +#define ReplicationSlotOnDiskV2Size \ + sizeof(ReplicationSlotOnDisk) - ReplicationSlotOnDiskConstantSize + +#define SLOT_MAGIC 0x1051CA1 /* format identifier */ +#define SLOT_VERSION 2 /* version for new files */ + +#endif /* REPL_SLOT_H */ \ No newline at end of file diff --git a/src/bin/pg_walcleaner/t/001_basic.pl b/src/bin/pg_walcleaner/t/001_basic.pl new file mode 100644 index 0000000000..a220ad830a --- /dev/null +++ b/src/bin/pg_walcleaner/t/001_basic.pl @@ -0,0 +1,14 @@ + +# Copyright (c) 2022, PostgreSQL Global Development Group + +use strict; +use warnings; + +use PostgreSQL::Test::Utils; +use Test::More; + +program_help_ok('pg_walcleaner'); +program_version_ok('pg_walcleaner'); +program_options_handling_ok('pg_walcleaner'); + +done_testing(); -- 2.25.1