From 16456473c61537c5f8c7689a6dac340be6b84c43 Mon Sep 17 00:00:00 2001 From: Mats Kindahl Date: Thu, 30 Apr 2026 07:05:36 +0200 Subject: pg_rewind: use UUIDs to detect independent same-TLI promotions Two PostgreSQL standbys can independently promote to the same timeline ID if their primary stopped before either had a chance to promote. In that situation both clusters share a timeline history prefix that looks identical to pg_rewind: same TLI numbers and same begin/end LSNs. The existing same-TLI shortcut therefore treated the source as a valid rewind target and skipped the rewind entirely, leaving the target's diverged WAL intact. Fix this by embedding a UUIDv7 value in every timeline history file entry at promotion time. Each promotion generates a fresh UUID, so two independent promotions to the same TLI will carry different UUIDs even though the TLI number and begin LSN are identical. When loading the timeline history, pg_rewind uses these UUIDs in two places: 1. findCommonAncestorTimeline checks that the TLI and UUID in each entry match. A mismatch signals independent promotions and the search continues to earlier entries to find the true common ancestor. 2. The same-TLI shortcut (source and target on the same current TLI) compares the UUID stored in the last completed history entry and a mismatch forces a full rewind instead of a no-op. UUIDs are zero for clusters that predate this change, and the comparison function treats a zero UUID on either side as "unknown / compatible", so the new code is fully backward-compatible with old history files. A new test in t/005_same_timeline.pl covers the same-TLI shortcut case: two standbys independently promote to TLI 2, each with a distinct UUID. --- src/backend/access/transam/timeline.c | 49 ++++++++- src/backend/access/transam/xlog.c | 40 +++++++- src/backend/utils/adt/uuid.c | 14 ++- src/bin/pg_rewind/pg_rewind.c | 120 +++++++++++++++++++++-- src/bin/pg_rewind/t/005_same_timeline.pl | 87 ++++++++++++++++ src/bin/pg_rewind/timeline.c | 47 ++++++++- src/include/access/timeline.h | 5 +- src/include/access/xlog_internal.h | 1 + src/include/utils/uuid.h | 10 +- 9 files changed, 349 insertions(+), 24 deletions(-) diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c index 68e5f692d26..bc768efa8a6 100644 --- a/src/backend/access/transam/timeline.c +++ b/src/backend/access/transam/timeline.c @@ -42,6 +42,8 @@ #include "pgstat.h" #include "storage/fd.h" #include "utils/wait_event.h" +#include "utils/fmgrprotos.h" +#include "utils/uuid.h" /* * Copies all timeline history files with id's between 'begin' and 'end' @@ -114,6 +116,7 @@ readTimeLineHistory(TimeLineID targetTLI) entry = palloc_object(TimeLineHistoryEntry); entry->tli = targetTLI; entry->begin = entry->end = InvalidXLogRecPtr; + memset(&entry->tluuid, 0, sizeof(pg_uuid_t)); return list_make1(entry); } @@ -125,6 +128,7 @@ readTimeLineHistory(TimeLineID targetTLI) prevend = InvalidXLogRecPtr; for (;;) { + char uuid_str[UUID_STR_LEN + 1] = {0}; char fline[MAXPGPATH]; char *res; char *ptr; @@ -155,7 +159,8 @@ readTimeLineHistory(TimeLineID targetTLI) if (*ptr == '\0' || *ptr == '#') continue; - nfields = sscanf(fline, "%u\t%X/%08X", &tli, &switchpoint_hi, &switchpoint_lo); + nfields = + sscanf(fline, "%u\t%X/%08X\t%36s", &tli, &switchpoint_hi, &switchpoint_lo, uuid_str); if (nfields < 1) { @@ -164,7 +169,7 @@ readTimeLineHistory(TimeLineID targetTLI) (errmsg("syntax error in history file: %s", fline), errhint("Expected a numeric timeline ID."))); } - if (nfields != 3) + if (nfields < 3) ereport(FATAL, (errmsg("syntax error in history file: %s", fline), errhint("Expected a write-ahead log switchpoint location."))); @@ -182,6 +187,23 @@ readTimeLineHistory(TimeLineID targetTLI) entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo; prevend = entry->end; + /* + * Parse the optional UUID field. Old history files have the + * reason string in field 4. It is in theory possible that the + * reason string starts with a UUID, but the current usage do + * not store a UUID. This allows us to support both old and new + * formats of history files without breaking compatibility by + * checking if the field contains a valid UUID. + */ + memset(&entry->tluuid, 0, sizeof(pg_uuid_t)); + if (nfields == 4 && strlen(uuid_str) == UUID_STR_LEN) + { + Datum datum = DirectFunctionCall1(uuid_in, CStringGetDatum(uuid_str)); + pg_uuid_t *up = DatumGetUUIDP(datum); + + memcpy(&entry->tluuid, up, sizeof(pg_uuid_t)); + } + /* Build list with newest item first */ result = lcons(entry, result); @@ -203,6 +225,7 @@ readTimeLineHistory(TimeLineID targetTLI) entry->tli = targetTLI; entry->begin = prevend; entry->end = InvalidXLogRecPtr; + memset(&entry->tluuid, 0, sizeof(pg_uuid_t)); result = lcons(entry, result); @@ -294,21 +317,33 @@ findNewestTimeLine(TimeLineID startTLI) * * newTLI: ID of the new timeline * parentTLI: ID of its immediate parent + * newTLUUID: UUID uniquely identifying this promotion instance * switchpoint: WAL location where the system switched to the new timeline * reason: human-readable explanation of why the timeline was switched * - * Currently this is only used at the end recovery, and so there are no locking + * The output file is named .history (e.g. 00000003.history). If two + * servers independently promote to the same timeline ID, their history files + * share the same name. In a shared WAL archive the second file to arrive + * silently overwrites the first. The newTLUUID written into the file content + * lets pg_rewind detect this collision: it fetches each server's history file + * directly from that server, compares the UUIDs for every shared TLI, and + * treats a UUID mismatch as evidence of independent promotion even when the + * TLI numbers agree. + * + * Currently this is only used at end of recovery, and so there are no locking * considerations. But we should be just as tense as XLogFileInit to avoid * emplacing a bogus file. */ void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, + const pg_uuid_t *newTLUUID, XLogRecPtr switchpoint, char *reason) { char path[MAXPGPATH]; char tmppath[MAXPGPATH]; char histfname[MAXFNAMELEN]; char buffer[BLCKSZ]; + char *uuid_str; int srcfd; int fd; int nbytes; @@ -398,13 +433,19 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, * * If we did have a parent file, insert an extra newline just in case the * parent file failed to end with one. + * + * Format: \t\t\t\n */ + uuid_str = DatumGetCString(DirectFunctionCall1(uuid_out, UUIDPGetDatum(newTLUUID))); + snprintf(buffer, sizeof(buffer), - "%s%u\t%X/%08X\t%s\n", + "%s%u\t%X/%08X\t%s\t%s\n", (srcfd < 0) ? "" : "\n", parentTLI, LSN_FORMAT_ARGS(switchpoint), + uuid_str, reason); + pfree(uuid_str); nbytes = strlen(buffer); errno = 0; diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index e39af79c03b..586d996c56f 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -99,6 +99,7 @@ #include "storage/subsystems.h" #include "storage/sync.h" #include "utils/guc_hooks.h" +#include "utils/uuid.h" #include "utils/guc_tables.h" #include "utils/injection_point.h" #include "utils/pgstat_internal.h" @@ -515,6 +516,13 @@ typedef struct XLogCtlData TimeLineID InsertTimeLineID; TimeLineID PrevTimeLineID; + /* + * UUID for the current promotion. Generated when the timeline history + * file is written and later embedded in the XLOG_END_OF_RECOVERY record. + * Protected by info_lck. + */ + pg_uuid_t ThisTimeLineUUID; + /* * SharedRecoveryState indicates if we're still in crash or archive * recovery. Protected by info_lck. @@ -6377,6 +6385,9 @@ StartupXLOG(void) newTLI = endOfRecoveryInfo->lastRecTLI; if (ArchiveRecoveryRequested) { + TimestampTz now = GetCurrentTimestamp(); + pg_uuid_t uuid_buf; + newTLI = findNewestTimeLine(recoveryTargetTLI) + 1; ereport(LOG, (errmsg("selected new timeline ID: %u", newTLI))); @@ -6407,8 +6418,27 @@ StartupXLOG(void) * to the new timeline, and will try to connect to the new timeline. * To minimize the window for that, try to do as little as possible * between here and writing the end-of-recovery record. + * + * Generate a UUIDv7 that uniquely identifies this promotion. The + * same UUID is written into the history file and later into the + * XLOG_END_OF_RECOVERY record so that pg_rewind can distinguish two + * servers that independently promoted to the same timeline ID. */ + + + /* + * TimestampTz is microseconds; generate_uuidv7 wants ms + sub-ms. We + * generate the UUID outside the spinlock, to avoid doing the relatively + * expensive UUID generation, which could involve unexpected delays, + * while holding the spinlock. + */ + generate_uuidv7_r(&uuid_buf, (uint64) (now / 1000), (uint32) (now % 1000) * 1000); + SpinLockAcquire(&XLogCtl->info_lck); + memcpy(&XLogCtl->ThisTimeLineUUID, &uuid_buf, sizeof(pg_uuid_t)); + SpinLockRelease(&XLogCtl->info_lck); + writeTimeLineHistory(newTLI, recoveryTargetTLI, + &uuid_buf, EndOfLog, endOfRecoveryInfo->recoveryStopReason); ereport(LOG, @@ -9042,8 +9072,16 @@ xlog_redo(XLogReaderState *record) { xl_end_of_recovery xlrec; TimeLineID replayTLI; + uint32 rec_len; - memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery)); + /* + * Zero the struct first so that old records without UUID fields + * produce all-zero UUIDs, which pg_rewind treats as "unknown". + */ + memset(&xlrec, 0, sizeof(xl_end_of_recovery)); + rec_len = XLogRecGetDataLen(record); + memcpy(&xlrec, XLogRecGetData(record), + Min(rec_len, sizeof(xl_end_of_recovery))); /* * For Hot Standby, we could treat this like a Shutdown Checkpoint, diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c index 6ee3752ac78..8dc098d11e3 100644 --- a/src/backend/utils/adt/uuid.c +++ b/src/backend/utils/adt/uuid.c @@ -72,7 +72,7 @@ static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup); static Datum uuid_abbrev_convert(Datum original, SortSupport ssup); static inline void uuid_set_version(pg_uuid_t *uuid, unsigned char version); static inline int64 get_real_time_ns_ascending(void); -static pg_uuid_t *generate_uuidv7(uint64 unix_ts_ms, uint32 sub_ms); +pg_uuid_t *generate_uuidv7(uint64 unix_ts_ms, uint32 sub_ms); Datum uuid_in(PG_FUNCTION_ARGS) @@ -581,6 +581,13 @@ get_real_time_ns_ascending(void) return ns; } +pg_uuid_t * +generate_uuidv7(uint64 unix_ts_ms, uint32 sub_ms) +{ + pg_uuid_t *uuid = palloc(UUID_LEN); + return generate_uuidv7_r(uuid, unix_ts_ms, sub_ms); +} + /* * Generate UUID version 7 per RFC 9562, with the given timestamp. * @@ -597,10 +604,9 @@ get_real_time_ns_ascending(void) * * NB: all numbers here are unsigned, unix_ts_ms cannot be negative per RFC. */ -static pg_uuid_t * -generate_uuidv7(uint64 unix_ts_ms, uint32 sub_ms) +pg_uuid_t * +generate_uuidv7_r(pg_uuid_t *uuid, uint64 unix_ts_ms, uint32 sub_ms) { - pg_uuid_t *uuid = palloc(UUID_LEN); uint32 increased_clock_precision; /* Fill in time part */ diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index 9d745d4b25b..b34f62bf968 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -32,6 +32,19 @@ #include "rewind_source.h" #include "storage/bufpage.h" +/* + * Timeline histories for both clusters, populated by timelines_match(). + */ +typedef struct TimelineHistoriesData +{ + TimeLineHistoryEntry *source, + *target; + int sourceNentries, + targetNentries; +} TimelineHistoriesData; + +typedef TimelineHistoriesData * TimelineHistories; + static void usage(const char *progname); static void perform_rewind(filemap_t *filemap, rewind_source *source, @@ -53,6 +66,9 @@ static void findCommonAncestorTimeline(TimeLineHistoryEntry *a_history, TimeLineHistoryEntry *b_history, int b_nentries, XLogRecPtr *recptr, int *tliIndex); +static inline bool matchingTimelineUUID(TimeLineHistoryEntry *a, TimeLineHistoryEntry *b); +static bool matchAndFetchTimelines(TimeLineID source_tli, TimeLineID target_tli, + TimelineHistories timelineHistories); static void ensureCleanShutdown(const char *argv0); static void disconnect_atexit(void); @@ -141,6 +157,7 @@ main(int argc, char **argv) int c; XLogRecPtr divergerec; int lastcommontliIndex; + TimelineHistoriesData timelineHistories; XLogRecPtr chkptrec; TimeLineID chkpttli; XLogRecPtr chkptredo; @@ -372,10 +389,20 @@ main(int argc, char **argv) * * If both clusters are already on the same timeline, there's nothing to * do. + * + * This also handles the case when two servers independently promoted to the + * same timeline ID: one crashed after writing the history file but before + * its EOR WAL record was distributed, so a second standby promoted + * independently. The history files produced by those two promotions carry + * different UUIDs. + * + * When the clusters are on different timelines we locate the fork point via + * findCommonAncestorTimeline. */ - if (target_tli == source_tli) + if (matchAndFetchTimelines(source_tli, target_tli, &timelineHistories)) { pg_log_info("source and target cluster are on the same timeline"); + pfree(timelineHistories.source); rewind_needed = false; target_wal_endrec = InvalidXLogRecPtr; } @@ -389,8 +416,10 @@ main(int argc, char **argv) * Retrieve timelines for both source and target, and find the point * where they diverged. */ - sourceHistory = getTimelineHistory(source_tli, true, &sourceNentries); - targetHistory = getTimelineHistory(target_tli, false, &targetNentries); + targetHistory = timelineHistories.target; + targetNentries = timelineHistories.targetNentries; + sourceHistory = timelineHistories.source; + sourceNentries = timelineHistories.sourceNentries; findCommonAncestorTimeline(sourceHistory, sourceNentries, targetHistory, targetNentries, @@ -874,7 +903,7 @@ getTimelineHistory(TimeLineID tli, bool is_source, int *nentries) */ if (tli == 1) { - history = pg_malloc_object(TimeLineHistoryEntry); + history = pg_malloc0_object(TimeLineHistoryEntry); history->tli = tli; history->begin = history->end = InvalidXLogRecPtr; *nentries = 1; @@ -920,6 +949,64 @@ getTimelineHistory(TimeLineID tli, bool is_source, int *nentries) return history; } +/* + * Return true if two per-entry promotion UUIDs are compatible. + * + * A zero UUID means the history file predates this fix (or the entry is + * synthetic). Zero on either side means "unknown; treat as matching" so + * that pg_rewind degrades gracefully when rewinding against an old server. + */ +static inline bool +matchingTimelineUUID(TimeLineHistoryEntry *a, TimeLineHistoryEntry *b) +{ + static const pg_uuid_t zero = {{0}}; + + if (memcmp(&a->tluuid, &zero, UUID_LEN) == 0 || memcmp(&b->tluuid, &zero, UUID_LEN) == 0) + return true; + return memcmp(&a->tluuid, &b->tluuid, UUID_LEN) == 0; +} + +/* + * Fetch the timeline history for both clusters, store them in tlh, and return + * true if the clusters are on the same timeline (no rewind needed). + * + * tlh is always fully populated on return regardless of the result, so the + * caller can pass tlh->source / tlh->target directly to + * findCommonAncestorTimeline() when the return value is false. + * + * TLI 1 always returns true: it is the original timeline and has no promotion + * UUID. For TLI greater than 2, the UUID in entry[Nentries - 2] identifies the + * promotion that created the current TLI; a zero UUID (old history file or + * synthetic entry) is treated as matching. + */ +static bool +matchAndFetchTimelines(TimeLineID source_tli, TimeLineID target_tli, TimelineHistories tlh) +{ + static const pg_uuid_t zero = {{0}}; + pg_uuid_t *a, + *b; + + tlh->source = getTimelineHistory(source_tli, true, &tlh->sourceNentries); + tlh->target = getTimelineHistory(target_tli, false, &tlh->targetNentries); + + if (source_tli != target_tli) + return false; + + /* TLI 1 has no promotion UUID; always treat as the same timeline. */ + if (tlh->sourceNentries < 2 || tlh->targetNentries < 2) + return true; + + a = &tlh->source[tlh->sourceNentries - 2].tluuid; + b = &tlh->target[tlh->targetNentries - 2].tluuid; + + if (memcmp(a, &zero, UUID_LEN) == 0) + return true; + if (memcmp(b, &zero, UUID_LEN) == 0) + return true; + + return memcmp(a, b, UUID_LEN) == 0; +} + /* * Determine the TLI of the last common timeline in the timeline history of * two clusters. *tliIndex is set to the index of last common timeline in @@ -936,17 +1023,30 @@ findCommonAncestorTimeline(TimeLineHistoryEntry *a_history, int a_nentries, /* * Trace the history forward, until we hit the timeline diverge. It may - * still be possible that the source and target nodes used the same - * timeline number in their history but with different start position - * depending on the history files that each node has fetched in previous - * recovery processes. Hence check the start position of the new timeline - * as well and move down by one extra timeline entry if they do not match. + * still be possible that the source and target nodes used the same timeline + * number in their history but with different start position depending on + * the history files that each node has fetched in previous recovery + * processes. Hence check the start position of the new timeline as well and + * move down by one extra timeline entry if they do not match. + * + * We also compare timeline UUIDs when both sides carry one. Two servers + * that independently promoted to the same timeline ID produce history files + * with the same name (e.g. 00000003.history); in a shared WAL archive the + * second file silently overwrites the first. pg_rewind fetches each + * server's history file directly from that server, so it sees both UUIDs. + * + * The timeline UUID stored in history entry[i] is the UUID of the promotion + * that created entry[i+1], i.e. the UUID of TLI entry[i+1].tli. So to + * check whether entry[i] itself represents the same timeline on both sides + * we look at entry[i-1].tluuid (for i > 0). TLI 1 (i == 0) is always the + * same: it is the original timeline and has no promotion UUID. */ n = Min(a_nentries, b_nentries); for (i = 0; i < n; i++) { if (a_history[i].tli != b_history[i].tli || - a_history[i].begin != b_history[i].begin) + a_history[i].begin != b_history[i].begin || + (i > 0 && !matchingTimelineUUID(&a_history[i - 1], &b_history[i - 1]))) break; } diff --git a/src/bin/pg_rewind/t/005_same_timeline.pl b/src/bin/pg_rewind/t/005_same_timeline.pl index 95a40c3b270..539d05f57a1 100644 --- a/src/bin/pg_rewind/t/005_same_timeline.pl +++ b/src/bin/pg_rewind/t/005_same_timeline.pl @@ -7,6 +7,8 @@ # use strict; use warnings FATAL => 'all'; +use File::Copy; +use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; use Test::More; @@ -21,4 +23,89 @@ RewindTest::create_standby(); RewindTest::run_pg_rewind('local'); RewindTest::clean_rewind_test(); +# Test that pg_rewind detects and handles two standbys that independently +# promoted to the same timeline ID. Before the UUID-based divergence check, +# pg_rewind's same-TLI shortcut would incorrectly skip the rewind in this +# case, leaving the target's diverged WAL intact. +# +# origin (TLI 1) +# | +# +--- node_a (TLI 1) --promote--> TLI 2, UUID-A (target) +# | +# +--- node_b (TLI 1) --promote--> TLI 2, UUID-B (source) +# +# pg_rewind must detect the UUID mismatch and rewind node_a to match node_b. + +my $node_origin = PostgreSQL::Test::Cluster->new('origin'); +$node_origin->init(allows_streaming => 1); +$node_origin->append_conf('postgresql.conf', "wal_keep_size = 320MB\n"); +$node_origin->start; + +$node_origin->safe_psql('postgres', "CREATE TABLE tbl (val text)"); +$node_origin->safe_psql('postgres', "INSERT INTO tbl VALUES ('initial')"); +$node_origin->safe_psql('postgres', 'CHECKPOINT'); + +# Create node_a and node_b from separate backups of origin so that each +# has its own data directory and will generate an independent UUID on promotion. +my $node_a = PostgreSQL::Test::Cluster->new('node_a'); +$node_origin->backup('backup_a'); +$node_a->init_from_backup($node_origin, 'backup_a', has_streaming => 1); +$node_a->set_standby_mode(); +$node_a->start; + +my $node_b = PostgreSQL::Test::Cluster->new('node_b'); +$node_origin->backup('backup_b'); +$node_b->init_from_backup($node_origin, 'backup_b', has_streaming => 1); +$node_b->set_standby_mode(); +$node_b->start; + +# Wait for both standbys to catch up to origin, then stop origin. After +# this point the two standbys are isolated and will promote independently. +$node_origin->wait_for_catchup($node_a); +$node_origin->wait_for_catchup($node_b); +$node_origin->stop; + +# Promote both standbys. Each lands on TLI 2 but generates a distinct UUID, +# so the resulting clusters are diverged even though they share a timeline ID. +$node_a->promote; +$node_b->promote; + +# Insert a divergent row on each so the rewind has visible work to do. +$node_a->safe_psql('postgres', "INSERT INTO tbl VALUES ('in A')"); +$node_b->safe_psql('postgres', "INSERT INTO tbl VALUES ('in B')"); + +# Stop both nodes; rewind node_a (target) from node_b (source) in local mode. +$node_a->stop; +$node_b->stop; + +my $node_a_pgdata = $node_a->data_dir; +my $tmp_folder = PostgreSQL::Test::Utils::tempdir; +copy("$node_a_pgdata/postgresql.conf", + "$tmp_folder/node_a-postgresql.conf.tmp"); + +command_ok( + [ + 'pg_rewind', + '--debug', + '--source-pgdata' => $node_b->data_dir, + '--target-pgdata' => $node_a_pgdata, + '--no-sync', + '--config-file' => "$tmp_folder/node_a-postgresql.conf.tmp", + ], + 'pg_rewind handles independent same-TLI promotion'); + +move("$tmp_folder/node_a-postgresql.conf.tmp", + "$node_a_pgdata/postgresql.conf"); + +# node_a should now mirror node_b: it has 'initial' and 'in B', not 'in A'. +$node_a->start; +my $result = + $node_a->safe_psql('postgres', "SELECT val FROM tbl ORDER BY val"); +is($result, "in B\ninitial", + 'rewound node has source data, not its own divergent data'); + +$node_a->teardown_node; +$node_b->teardown_node; +$node_origin->teardown_node; + done_testing(); diff --git a/src/bin/pg_rewind/timeline.c b/src/bin/pg_rewind/timeline.c index dda06eaa0bc..b6500606b27 100644 --- a/src/bin/pg_rewind/timeline.c +++ b/src/bin/pg_rewind/timeline.c @@ -9,9 +9,40 @@ */ #include "postgres_fe.h" +#include +#include + #include "access/timeline.h" #include "pg_rewind.h" +/* + * Parse a UUID string in standard dashed form into a pg_uuid_t. + * Returns true on success, false if str is not a valid UUID string. + */ +static bool +rewind_parse_uuid(const char *str, pg_uuid_t *uuid) +{ + const char *src = str; + + for (int i = 0; i < UUID_LEN; i++) + { + char buf[3]; + + if (!isxdigit((unsigned char) src[0]) || + !isxdigit((unsigned char) src[1])) + return false; + buf[0] = src[0]; + buf[1] = src[1]; + buf[2] = '\0'; + uuid->data[i] = (unsigned char) strtoul(buf, NULL, 16); + src += 2; + /* skip dash at positions after bytes 3, 5, 7, 9 (i == 3,5,7,9) */ + if (src[0] == '-' && (i == 3 || i == 5 || i == 7 || i == 9)) + src++; + } + return (*src == '\0'); +} + /* * This is copy-pasted from the backend readTimeLineHistory, modified to * return a malloc'd array and to work without backend functions. @@ -48,6 +79,7 @@ rewind_parseTimeLineHistory(char *buffer, TimeLineID targetTLI, int *nentries) uint32 switchpoint_hi; uint32 switchpoint_lo; int nfields; + char uuid_str[UUID_STR_LEN + 1] = {0}; fline = bufptr; while (*bufptr && *bufptr != '\n') @@ -66,7 +98,8 @@ rewind_parseTimeLineHistory(char *buffer, TimeLineID targetTLI, int *nentries) if (*ptr == '\0' || *ptr == '#') continue; - nfields = sscanf(fline, "%u\t%X/%08X", &tli, &switchpoint_hi, &switchpoint_lo); + nfields = sscanf(fline, "%u\t%X/%08X\t%36s", &tli, &switchpoint_hi, + &switchpoint_lo, uuid_str); if (nfields < 1) { @@ -75,7 +108,7 @@ rewind_parseTimeLineHistory(char *buffer, TimeLineID targetTLI, int *nentries) pg_log_error_detail("Expected a numeric timeline ID."); exit(1); } - if (nfields != 3) + if (nfields < 3) { pg_log_error("syntax error in history file: %s", fline); pg_log_error_detail("Expected a write-ahead log switchpoint location."); @@ -99,7 +132,14 @@ rewind_parseTimeLineHistory(char *buffer, TimeLineID targetTLI, int *nentries) entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo; prevend = entry->end; - /* we ignore the remainder of each line */ + /* + * Parse the optional UUID field. Old history files have the reason + * string in field 4; its first word is much shorter than UUID_STR_LEN + * so the length check safely distinguishes old from new format. + */ + memset(&entry->tluuid, 0, sizeof(pg_uuid_t)); + if (nfields == 4 && strlen(uuid_str) == UUID_STR_LEN) + rewind_parse_uuid(uuid_str, &entry->tluuid); } if (entries && targetTLI <= lasttli) @@ -123,6 +163,7 @@ rewind_parseTimeLineHistory(char *buffer, TimeLineID targetTLI, int *nentries) entry->tli = targetTLI; entry->begin = prevend; entry->end = InvalidXLogRecPtr; + memset(&entry->tluuid, 0, sizeof(pg_uuid_t)); *nentries = nlines; return entries; diff --git a/src/include/access/timeline.h b/src/include/access/timeline.h index 97f1d619c35..cdd642c94f0 100644 --- a/src/include/access/timeline.h +++ b/src/include/access/timeline.h @@ -13,6 +13,7 @@ #include "access/xlogdefs.h" #include "nodes/pg_list.h" +#include "utils/uuid.h" /* * A list of these structs describes the timeline history of the server. Each @@ -22,9 +23,10 @@ * pointers of all the entries form a contiguous line from beginning of time * to infinity. */ -typedef struct +typedef struct TimeLineHistoryEntry { TimeLineID tli; + pg_uuid_t tluuid; /* from history file; zero if unknown */ XLogRecPtr begin; /* inclusive */ XLogRecPtr end; /* exclusive, InvalidXLogRecPtr means infinity */ } TimeLineHistoryEntry; @@ -33,6 +35,7 @@ extern List *readTimeLineHistory(TimeLineID targetTLI); extern bool existsTimeLineHistory(TimeLineID probeTLI); extern TimeLineID findNewestTimeLine(TimeLineID startTLI); extern void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, + const pg_uuid_t *newTLUUID, XLogRecPtr switchpoint, char *reason); extern void writeTimeLineHistoryFile(TimeLineID tli, char *content, int size); extern void restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end); diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 13ae3ad4fbb..8d5e374dfad 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -22,6 +22,7 @@ #include "access/xlogdefs.h" #include "access/xlogreader.h" #include "datatype/timestamp.h" +#include "utils/uuid.h" #include "lib/stringinfo.h" #include "pgtime.h" #include "storage/block.h" diff --git a/src/include/utils/uuid.h b/src/include/utils/uuid.h index 572d8cf4c36..784920c1f8e 100644 --- a/src/include/utils/uuid.h +++ b/src/include/utils/uuid.h @@ -17,12 +17,16 @@ /* uuid size in bytes */ #define UUID_LEN 16 +/* length of a UUID string (without null terminator): xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx */ +#define UUID_STR_LEN 36 + typedef struct pg_uuid_t { unsigned char data[UUID_LEN]; } pg_uuid_t; -/* fmgr interface macros */ +/* fmgr interface macros (backend only) */ +#ifndef FRONTEND static inline Datum UUIDPGetDatum(const pg_uuid_t *X) { @@ -38,5 +42,9 @@ DatumGetUUIDP(Datum X) } #define PG_GETARG_UUID_P(X) DatumGetUUIDP(PG_GETARG_DATUM(X)) +#endif /* !FRONTEND */ + +extern pg_uuid_t *generate_uuidv7(uint64 unix_ts_ms, uint32 sub_ms); +extern pg_uuid_t *generate_uuidv7_r(pg_uuid_t *uuid, uint64 unix_ts_ms, uint32 sub_ms); #endif /* UUID_H */ -- 2.43.0