From 6b682ec934728a97b6458709b4b161772d5cfa65 Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Wed, 14 Dec 2022 15:24:21 +0300 Subject: [PATCH v51 3/3] =?UTF-8?q?Make=20pg=5Fupgrade=20from=2032-bit=20t?= =?UTF-8?q?o=2064=E2=80=93bit=20SLRU?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Author: Alexander Korotkov Author: Teodor Sigaev Author: Nikita Glukhov Author: Maxim Orlov Author: Pavel Borisov Author: Yura Sokolov Author: Aleksander Alekseev Reviewed-by: Aleksander Alekseev Discussion: https://postgr.es/m/CACG%3DezZe1NQSCnfHOr78AtAZxJZeCvxrts0ygrxYwe%3DpyyjVWA%40mail.gmail.com Discussion: https://postgr.es/m/CAJ7c6TPDOYBYrnCAeyndkBktO0WG2xSdYduTF0nxq%2BvfkmTF5Q%40mail.gmail.com --- src/bin/pg_upgrade/pg_upgrade.c | 292 +++++++++++++++++++++++++++++++- src/bin/pg_upgrade/pg_upgrade.h | 5 + 2 files changed, 293 insertions(+), 4 deletions(-) diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index 115faa222e..0cedc05416 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -44,6 +44,7 @@ #include #endif +#include "access/transam.h" #include "catalog/pg_class_d.h" #include "common/file_perm.h" #include "common/logging.h" @@ -566,6 +567,277 @@ copy_subdir_files(const char *old_subdir, const char *new_subdir) check_ok(); } +/******************************************************************************/ +#define SLRU_PAGES_PER_SEGMENT 32 /* Should be equal to value from slru.h */ + +#define CLOG_BITS_PER_XACT 2 +#define CLOG_XACTS_PER_BYTE 4 +#define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE) +/* + * Rename the files from the old cluster into it + */ +typedef struct SLRUSegmentState +{ + const char *dir; + FILE *file; + int64 segno; + int64 pageno; + bool is_empty_segment; +} SLRUSegmentState; + +static char * +slru_filename_old(const char *path, int64 segno) +{ + Assert(segno <= PG_INT32_MAX); + return psprintf("%s/%04X", path, (int) segno); +} + +static char * +slru_filename_new(const char *path, int64 segno) +{ + return psprintf("%s/%012llX", path, (long long) segno); +} + +static inline FILE * +open_file(SLRUSegmentState *state, + char * (filename_fn)(const char *path, int64 segno), + char *mode, char *fatal_msg) +{ + char *filename = filename_fn(state->dir, state->segno); + FILE *fd = fopen(filename, mode); + + if (!fd) + pg_fatal(fatal_msg, filename); + + pfree(filename); + + return fd; +} + +static void +close_file(SLRUSegmentState *state, + char * (filename_fn)(const char *path, int64 segno)) +{ + if (state->file != NULL) + { + if (fclose(state->file) != 0) + pg_fatal("could not close file \"%s\": %m", + filename_fn(state->dir, state->segno)); + state->file = NULL; + } +} + +static inline int +read_file(SLRUSegmentState *state, void *buf) +{ + size_t n = fread(buf, sizeof(char), BLCKSZ, state->file); + + if (n != 0) + return n; + + if (ferror(state->file)) + pg_fatal("could not read file \"%s\": %m", + slru_filename_old(state->dir, state->segno)); + + if (!feof(state->file)) + pg_fatal("unknown file read state \"%s\": %m", + slru_filename_old(state->dir, state->segno)); + + close_file(state, slru_filename_old); + + return 0; +} + +static int +read_old_segment_page(SLRUSegmentState *state, void *buf, bool *is_empty) +{ + int n; + + /* Open next segment file, if needed */ + if (!state->file) + { + state->file = open_file(state, slru_filename_old, "rb", + "could not open source file \"%s\": %m"); + + /* Set position to the needed page */ + if (fseek(state->file, state->pageno * BLCKSZ, SEEK_SET)) + close_file(state, slru_filename_old); + + /* + * Skip segment conversion if segment file doesn't exist. + * First segment file should exist in any case. + */ + if (state->segno != 0) + state->is_empty_segment = true; + } + + if (state->file) + { + /* Segment file does exist, read page from it */ + state->is_empty_segment = false; + + /* Try to read BLCKSZ bytes */ + n = read_file(state, buf); + *is_empty = (n == 0); + + /* Zeroing buf tail if needed */ + if (n) + memset((char *) buf + n, 0, BLCKSZ - n); + } + else + { + n = state->is_empty_segment ? + BLCKSZ : /* Skip empty block at the end of segment */ + 0; /* We reached the last segment */ + *is_empty = true; + + if (n) + memset((char *) buf, 0, BLCKSZ); + } + + state->pageno++; + + if (state->pageno >= SLRU_PAGES_PER_SEGMENT) + { + /* Start new segment */ + state->segno++; + state->pageno = 0; + close_file(state, slru_filename_old); + } + + return n; +} + +static void +write_new_segment_page(SLRUSegmentState *state, void *buf, bool is_empty) +{ + /* + * Create a new segment file if we still didn't. Creation is postponed + * until the first non-empty page is found. This helps not to create + * completely empty segments. + */ + if (!state->file && !is_empty) + { + state->file = open_file(state, slru_filename_new, "wb", + "could not open target file \"%s\": %m"); + + /* Write zeroes to the previously skipped prefix */ + if (state->pageno > 0) + { + char zerobuf[BLCKSZ] = {0}; + + for (int64 i = 0; i < state->pageno; i++) + { + if (fwrite(zerobuf, sizeof(char), BLCKSZ, state->file) != BLCKSZ) + pg_fatal("could not write file \"%s\": %m", + slru_filename_new(state->dir, state->segno)); + } + } + + } + + /* Write page to the new segment (if it was created) */ + if (state->file) + { + if (fwrite(buf, sizeof(char), BLCKSZ, state->file) != BLCKSZ) + pg_fatal("could not write file \"%s\": %m", + slru_filename_new(state->dir, state->segno)); + } + + state->pageno++; + + /* + * Did we reach the maximum page number? Then close segment file and + * create a new one on the next iteration + */ + if (state->pageno >= SLRU_PAGES_PER_SEGMENT) + { + state->segno++; + state->pageno = 0; + close_file(state, slru_filename_new); + } +} + +static void +convert_pg_xact_segments(const char *old_subdir, const char *new_subdir) +{ + SLRUSegmentState oldseg = {0}; + SLRUSegmentState newseg = {0}; + char buf[BLCKSZ] = {0}; + FullTransactionId oldestxid; + FullTransactionId nxtxid; + uint32 epoch; + int64 pageno; + uint64 xid; + + oldseg.dir = old_subdir; + newseg.dir = new_subdir; + + /* wraparound without epoch is not possible */ + if (old_cluster.controldata.chkpnt_nxtepoch == 0 && + old_cluster.controldata.chkpnt_oldstxid > old_cluster.controldata.chkpnt_nxtxid) + { + pg_fatal("inconsistent pg_xact of directory \"%s\"", + old_cluster.pgdata); + } + + /* get full transactions bounds from old cluster */ + epoch = old_cluster.controldata.chkpnt_nxtepoch; + nxtxid = FullTransactionIdFromEpochAndXid(epoch, + old_cluster.controldata.chkpnt_nxtxid); + if (old_cluster.controldata.chkpnt_oldstxid > XidFromFullTransactionId(nxtxid)) + --epoch; + + oldestxid = FullTransactionIdFromEpochAndXid(epoch, + old_cluster.controldata.chkpnt_oldstxid); + + /* get init segments and pages */ + pageno = oldestxid.value / CLOG_XACTS_PER_PAGE; + + oldseg.segno = pageno / SLRU_PAGES_PER_SEGMENT; + oldseg.pageno = pageno % SLRU_PAGES_PER_SEGMENT; + + newseg.segno = pageno / SLRU_PAGES_PER_SEGMENT; + newseg.pageno = pageno % SLRU_PAGES_PER_SEGMENT; + + /* Copy xid flags reading only needed segment pages */ + for (xid = oldestxid.value & ~(CLOG_XACTS_PER_PAGE - 1); + xid <= ((nxtxid.value - 1) & ~(CLOG_XACTS_PER_PAGE - 1)); + xid += CLOG_XACTS_PER_PAGE) + { + bool is_empty; + int len; + + /* Handle possible segment wraparound */ + if (oldseg.segno > MaxTransactionId / CLOG_XACTS_PER_PAGE / SLRU_PAGES_PER_SEGMENT) + { + Assert(!oldseg.pageno); + Assert(!oldseg.file); + Assert(!newseg.pageno); + Assert(!newseg.file); + + oldseg.segno = 0; + } + + len = read_old_segment_page(&oldseg, buf, &is_empty); + + /* + * Ignore read errors, copy all existing segment pages in the + * interesting xid range. + */ + is_empty |= len <= 0; + + Assert(len >= 0); + Assert(is_empty == false); + + write_new_segment_page(&newseg, buf, is_empty); + } + + /* Release resources */ + close_file(&oldseg, slru_filename_old); + close_file(&newseg, slru_filename_new); +} + static void copy_xact_xlog_xid(void) { @@ -573,10 +845,22 @@ copy_xact_xlog_xid(void) * Copy old commit logs to new data dir. pg_clog has been renamed to * pg_xact in post-10 clusters. */ - copy_subdir_files(GET_MAJOR_VERSION(old_cluster.major_version) <= 906 ? - "pg_clog" : "pg_xact", - GET_MAJOR_VERSION(new_cluster.major_version) <= 906 ? - "pg_clog" : "pg_xact"); + if (old_cluster.controldata.cat_ver < SLRU_FORMAT_CHANGE_CAT_VER) + { +#define GetClogDirName(cluster) \ + GET_MAJOR_VERSION(cluster.major_version) <= 906 ? "pg_clog" : "pg_xact" + char *old_path = psprintf("%s/%s", old_cluster.pgdata, GetClogDirName(old_cluster)); + char *new_path = psprintf("%s/%s", new_cluster.pgdata, GetClogDirName(new_cluster)); + + convert_pg_xact_segments(old_path, new_path); + pfree(old_path); + pfree(new_path); + } + else + copy_subdir_files(GET_MAJOR_VERSION(old_cluster.major_version) <= 906 ? + "pg_clog" : "pg_xact", + GET_MAJOR_VERSION(new_cluster.major_version) <= 906 ? + "pg_clog" : "pg_xact"); prep_status("Setting oldest XID for new cluster"); exec_prog(UTILITY_LOG_FILE, NULL, true, true, diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 31589b0fdc..0cfafc8810 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -126,6 +126,11 @@ extern char *output_files[]; */ #define JSONB_FORMAT_CHANGE_CAT_VER 201409291 +/* + * change in SLRU numbering to 64–bit + */ +#define SLRU_FORMAT_CHANGE_CAT_VER 202212092 + /* * Each relation is represented by a relinfo structure. -- 2.38.1