From 73b9e5827f6e590e5c558f36ce0962f3bdecd2ad Mon Sep 17 00:00:00 2001 From: Craig Ringer Date: Tue, 23 Feb 2016 16:00:09 +0800 Subject: [PATCH 3/7] Retain extra WAL for failover slots in base backups Change the return value of pg_start_backup(), the BASE_BACKUP walsender command, etc to report the minimum WAL required by any failover slot if this is a lower LSN than the redo position so that base backups contain the WAL required for slots to work. Add a new backup label entry 'MIN FAILOVER SLOT LSN' that, if present, indicates the minimum LSN needed by any failover slot that is present in the base backup. Backup tools should check for this entry and ensure they retain all xlogs including and after that point. --- src/backend/access/transam/xlog.c | 41 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index a92f09d..9018af5 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -9797,6 +9797,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, bool backup_started_in_recovery = false; XLogRecPtr checkpointloc; XLogRecPtr startpoint; + XLogRecPtr slot_startpoint; TimeLineID starttli; pg_time_t stamp_time; char strfbuf[128]; @@ -9943,6 +9944,17 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, checkpointfpw = ControlFile->checkPointCopy.fullPageWrites; LWLockRelease(ControlFileLock); + /* + * If failover slots are in use we must retain and transfer WAL + * older than the redo location so that those slots can be replayed + * from after a failover event. + * + * This MUST be at an xlog segment boundary so truncate the LSN + * appropriately. + */ + if (max_replication_slots > 0) + slot_startpoint = (ReplicationSlotsComputeRequiredLSN(true)/ XLOG_SEG_SIZE) * XLOG_SEG_SIZE; + if (backup_started_in_recovery) { XLogRecPtr recptr; @@ -10111,6 +10123,10 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, backup_started_in_recovery ? "standby" : "master"); appendStringInfo(&labelfbuf, "START TIME: %s\n", strfbuf); appendStringInfo(&labelfbuf, "LABEL: %s\n", backupidstr); + if (slot_startpoint != InvalidXLogRecPtr) + appendStringInfo(&labelfbuf, "MIN FAILOVER SLOT LSN: %X/%X\n", + (uint32)(slot_startpoint>>32), (uint32)slot_startpoint); + /* * Okay, write the file, or return its contents to caller. @@ -10204,9 +10220,34 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, /* * We're done. As a convenience, return the starting WAL location. + * + * pg_basebackup etc expect to use this as the position to start copying + * WAL from, so we should return the minimum of the slot start LSN and the + * current redo position to make sure we get all WAL required by failover + * slots. + * + * The min required LSN for failover slots is also available from the + * 'MIN FAILOVER SLOT LSN' entry in the backup label file. */ + if (slot_startpoint != InvalidXLogRecPtr && slot_startpoint < startpoint) + { + List *history; + TimeLineID slot_start_tli; + + /* Min LSN required by a slot may be on an older timeline. */ + history = readTimeLineHistory(ThisTimeLineID); + slot_start_tli = tliOfPointInHistory(slot_startpoint, history); + list_free_deep(history); + + if (slot_start_tli < starttli) + starttli = slot_start_tli; + + startpoint = slot_startpoint; + } + if (starttli_p) *starttli_p = starttli; + return startpoint; } -- 2.1.0