diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c new file mode 100644 index 2081cf8..f0927c2 *** a/src/bin/pg_rewind/parsexlog.c --- b/src/bin/pg_rewind/parsexlog.c *************** static char xlogfpath[MAXPGPATH]; *** 45,51 **** typedef struct XLogPageReadPrivate { const char *datadir; ! TimeLineID tli; } XLogPageReadPrivate; static int SimpleXLogPageRead(XLogReaderState *xlogreader, --- 45,51 ---- typedef struct XLogPageReadPrivate { const char *datadir; ! int tliIndex; } XLogPageReadPrivate; static int SimpleXLogPageRead(XLogReaderState *xlogreader, *************** static int SimpleXLogPageRead(XLogReader *** 55,65 **** /* * Read WAL from the datadir/pg_xlog, starting from 'startpoint' on timeline ! * 'tli', until 'endpoint'. Make note of the data blocks touched by the WAL ! * records, and return them in a page map. */ void ! extractPageMap(const char *datadir, XLogRecPtr startpoint, TimeLineID tli, XLogRecPtr endpoint) { XLogRecord *record; --- 55,65 ---- /* * Read WAL from the datadir/pg_xlog, starting from 'startpoint' on timeline ! * index 'tliIndex' in target timeline history, until 'endpoint'. Make note of ! * the data blocks touched by the WAL records, and return them in a page map. */ void ! extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex, XLogRecPtr endpoint) { XLogRecord *record; *************** extractPageMap(const char *datadir, XLog *** 68,74 **** XLogPageReadPrivate private; private.datadir = datadir; ! private.tli = tli; xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private); if (xlogreader == NULL) pg_fatal("out of memory\n"); --- 68,74 ---- XLogPageReadPrivate private; private.datadir = datadir; ! private.tliIndex = tliIndex; xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private); if (xlogreader == NULL) pg_fatal("out of memory\n"); *************** extractPageMap(const char *datadir, XLog *** 112,118 **** * doing anything with the record itself. */ XLogRecPtr ! readOneRecord(const char *datadir, XLogRecPtr ptr, TimeLineID tli) { XLogRecord *record; XLogReaderState *xlogreader; --- 112,118 ---- * doing anything with the record itself. */ XLogRecPtr ! readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex) { XLogRecord *record; XLogReaderState *xlogreader; *************** readOneRecord(const char *datadir, XLogR *** 121,127 **** XLogRecPtr endptr; private.datadir = datadir; ! private.tli = tli; xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private); if (xlogreader == NULL) pg_fatal("out of memory\n"); --- 121,127 ---- XLogRecPtr endptr; private.datadir = datadir; ! private.tliIndex = tliIndex; xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private); if (xlogreader == NULL) pg_fatal("out of memory\n"); *************** readOneRecord(const char *datadir, XLogR *** 152,158 **** * Find the previous checkpoint preceding given WAL position. */ void ! findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, TimeLineID tli, XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli, XLogRecPtr *lastchkptredo) { --- 152,158 ---- * Find the previous checkpoint preceding given WAL position. */ void ! findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli, XLogRecPtr *lastchkptredo) { *************** findLastCheckpoint(const char *datadir, *** 173,179 **** forkptr += (forkptr % XLogSegSize == 0) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD; private.datadir = datadir; ! private.tli = tli; xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private); if (xlogreader == NULL) pg_fatal("out of memory\n"); --- 173,179 ---- forkptr += (forkptr % XLogSegSize == 0) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD; private.datadir = datadir; ! private.tliIndex = tliIndex; xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private); if (xlogreader == NULL) pg_fatal("out of memory\n"); *************** SimpleXLogPageRead(XLogReaderState *xlog *** 236,244 **** { XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data; uint32 targetPageOff; ! XLogSegNo targetSegNo PG_USED_FOR_ASSERTS_ONLY; XLByteToSeg(targetPagePtr, targetSegNo); targetPageOff = targetPagePtr % XLogSegSize; /* --- 236,246 ---- { XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data; uint32 targetPageOff; ! XLogRecPtr targetSegEnd; ! XLogSegNo targetSegNo; XLByteToSeg(targetPagePtr, targetSegNo); + XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, targetSegEnd); targetPageOff = targetPagePtr % XLogSegSize; /* *************** SimpleXLogPageRead(XLogReaderState *xlog *** 257,263 **** { char xlogfname[MAXFNAMELEN]; ! XLogFileName(xlogfname, private->tli, xlogreadsegno); snprintf(xlogfpath, MAXPGPATH, "%s/" XLOGDIR "/%s", private->datadir, xlogfname); --- 259,278 ---- { char xlogfname[MAXFNAMELEN]; ! /* ! * Since incomplete segments are copied into next timelines, find the ! * lastest timeline holding required segment. Assuming we can move ! * in xlog both forward and backward, consider also switching timeline ! * back. ! */ ! while (private->tliIndex < targetNentries - 1 && ! targetHistory[private->tliIndex].end < targetSegEnd) ! private->tliIndex++; ! while (private->tliIndex > 0 && ! targetHistory[private->tliIndex].begin >= targetSegEnd) ! private->tliIndex--; ! ! XLogFileName(xlogfname, targetHistory[private->tliIndex].tli, xlogreadsegno); snprintf(xlogfpath, MAXPGPATH, "%s/" XLOGDIR "/%s", private->datadir, xlogfname); *************** SimpleXLogPageRead(XLogReaderState *xlog *** 293,299 **** Assert(targetSegNo == xlogreadsegno); ! *pageTLI = private->tli; return XLOG_BLCKSZ; } --- 308,314 ---- Assert(targetSegNo == xlogreadsegno); ! *pageTLI = targetHistory[private->tliIndex].tli; return XLOG_BLCKSZ; } diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c new file mode 100644 index 032301f..2fe3ec5 *** a/src/bin/pg_rewind/pg_rewind.c --- b/src/bin/pg_rewind/pg_rewind.c *************** static void digestControlFile(ControlFil *** 37,43 **** size_t size); static void updateControlFile(ControlFileData *ControlFile); static void sanityChecks(void); ! static void findCommonAncestorTimeline(XLogRecPtr *recptr, TimeLineID *tli); static ControlFileData ControlFile_target; static ControlFileData ControlFile_source; --- 37,43 ---- size_t size); static void updateControlFile(ControlFileData *ControlFile); static void sanityChecks(void); ! static void findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex); static ControlFileData ControlFile_target; static ControlFileData ControlFile_source; *************** bool debug = false; *** 53,58 **** --- 53,62 ---- bool showprogress = false; bool dry_run = false; + /* Target history */ + TimeLineHistoryEntry *targetHistory; + int targetNentries; + static void usage(const char *progname) { *************** main(int argc, char **argv) *** 88,94 **** int option_index; int c; XLogRecPtr divergerec; ! TimeLineID lastcommontli; XLogRecPtr chkptrec; TimeLineID chkpttli; XLogRecPtr chkptredo; --- 92,98 ---- int option_index; int c; XLogRecPtr divergerec; ! int lastcommontliIndex; XLogRecPtr chkptrec; TimeLineID chkpttli; XLogRecPtr chkptredo; *************** main(int argc, char **argv) *** 214,222 **** if (ControlFile_target.checkPointCopy.ThisTimeLineID == ControlFile_source.checkPointCopy.ThisTimeLineID) pg_fatal("source and target cluster are on the same timeline\n"); ! findCommonAncestorTimeline(&divergerec, &lastcommontli); printf(_("The servers diverged at WAL position %X/%X on timeline %u.\n"), ! (uint32) (divergerec >> 32), (uint32) divergerec, lastcommontli); /* * Check for the possibility that the target is in fact a direct ancestor --- 218,227 ---- if (ControlFile_target.checkPointCopy.ThisTimeLineID == ControlFile_source.checkPointCopy.ThisTimeLineID) pg_fatal("source and target cluster are on the same timeline\n"); ! findCommonAncestorTimeline(&divergerec, &lastcommontliIndex); printf(_("The servers diverged at WAL position %X/%X on timeline %u.\n"), ! (uint32) (divergerec >> 32), (uint32) divergerec, ! targetHistory[lastcommontliIndex].tli); /* * Check for the possibility that the target is in fact a direct ancestor *************** main(int argc, char **argv) *** 234,240 **** /* Read the checkpoint record on the target to see where it ends. */ chkptendrec = readOneRecord(datadir_target, ControlFile_target.checkPoint, ! ControlFile_target.checkPointCopy.ThisTimeLineID); /* * If the histories diverged exactly at the end of the shutdown --- 239,245 ---- /* Read the checkpoint record on the target to see where it ends. */ chkptendrec = readOneRecord(datadir_target, ControlFile_target.checkPoint, ! targetNentries - 1); /* * If the histories diverged exactly at the end of the shutdown *************** main(int argc, char **argv) *** 254,260 **** exit(0); } ! findLastCheckpoint(datadir_target, divergerec, lastcommontli, &chkptrec, &chkpttli, &chkptredo); printf(_("Rewinding from last common checkpoint at %X/%X on timeline %u\n"), (uint32) (chkptrec >> 32), (uint32) chkptrec, --- 259,266 ---- exit(0); } ! findLastCheckpoint(datadir_target, divergerec, ! lastcommontliIndex, &chkptrec, &chkpttli, &chkptredo); printf(_("Rewinding from last common checkpoint at %X/%X on timeline %u\n"), (uint32) (chkptrec >> 32), (uint32) chkptrec, *************** main(int argc, char **argv) *** 277,283 **** * we would need to replay until the end of WAL here. */ pg_log(PG_PROGRESS, "reading WAL in target\n"); ! extractPageMap(datadir_target, chkptrec, lastcommontli, ControlFile_target.checkPoint); filemap_finalize(); --- 283,289 ---- * we would need to replay until the end of WAL here. */ pg_log(PG_PROGRESS, "reading WAL in target\n"); ! extractPageMap(datadir_target, chkptrec, lastcommontliIndex, ControlFile_target.checkPoint); filemap_finalize(); *************** sanityChecks(void) *** 385,397 **** * server is shut down. There isn't any very strong reason for this * limitation, but better safe than sorry. */ ! if (datadir_source && ControlFile_source.state != DB_SHUTDOWNED) pg_fatal("source data directory must be shut down cleanly\n"); } /* * Determine the TLI of the last common timeline in the histories of the two ! * clusters. *tli is set to the last common timeline, and *recptr is set to * the position where the histories diverged (ie. the first WAL record that's * not the same in both clusters). * --- 391,421 ---- * server is shut down. There isn't any very strong reason for this * limitation, but better safe than sorry. */ ! if (datadir_source && ControlFile_source.state != DB_SHUTDOWNED ! && ControlFile_source.state != DB_SHUTDOWNED_IN_RECOVERY) pg_fatal("source data directory must be shut down cleanly\n"); } /* + * Find minimum from two xlog pointers assuming invalid pointer (0) means + * infinity as timeline.h states. + */ + static XLogRecPtr + xlPtrMin(XLogRecPtr a, XLogRecPtr b) + { + if (XLogRecPtrIsInvalid(a)) + return b; + else if (XLogRecPtrIsInvalid(b)) + return a; + else + return Min(a, b); + } + + /* * Determine the TLI of the last common timeline in the histories of the two ! * clusters. targetHistory is filled with target timeline history and ! * targetNentries is number of items in targetHistory. *tliIndex is set to the ! * index of last common timeline in targetHistory array, and *recptr is set to * the position where the histories diverged (ie. the first WAL record that's * not the same in both clusters). * *************** sanityChecks(void) *** 399,411 **** * before calling this. */ static void ! findCommonAncestorTimeline(XLogRecPtr *recptr, TimeLineID *tli) { TimeLineID targettli; TimeLineHistoryEntry *sourceHistory; ! int nentries; ! int i; ! TimeLineID sourcetli; targettli = ControlFile_target.checkPointCopy.ThisTimeLineID; sourcetli = ControlFile_source.checkPointCopy.ThisTimeLineID; --- 423,435 ---- * before calling this. */ static void ! findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex) { + TimeLineID sourcetli; TimeLineID targettli; TimeLineHistoryEntry *sourceHistory; ! int sourceNentries; ! int i, n; targettli = ControlFile_target.checkPointCopy.ThisTimeLineID; sourcetli = ControlFile_source.checkPointCopy.ThisTimeLineID; *************** findCommonAncestorTimeline(XLogRecPtr *r *** 416,422 **** sourceHistory = (TimeLineHistoryEntry *) pg_malloc(sizeof(TimeLineHistoryEntry)); sourceHistory->tli = sourcetli; sourceHistory->begin = sourceHistory->end = InvalidXLogRecPtr; ! nentries = 1; } else { --- 440,446 ---- sourceHistory = (TimeLineHistoryEntry *) pg_malloc(sizeof(TimeLineHistoryEntry)); sourceHistory->tli = sourcetli; sourceHistory->begin = sourceHistory->end = InvalidXLogRecPtr; ! sourceNentries = 1; } else { *************** findCommonAncestorTimeline(XLogRecPtr *r *** 428,459 **** sourceHistory = rewind_parseTimeLineHistory(histfile, ControlFile_source.checkPointCopy.ThisTimeLineID, ! &nentries); pg_free(histfile); } ! /* ! * Trace the history backwards, until we hit the target timeline. ! * ! * TODO: This assumes that there are no timeline switches on the target ! * cluster after the fork. ! */ ! for (i = nentries - 1; i >= 0; i--) { ! TimeLineHistoryEntry *entry = &sourceHistory[i]; ! if (entry->tli == targettli) { ! /* found it */ ! *recptr = entry->end; ! *tli = entry->tli; ! pg_free(sourceHistory); ! return; } } ! pg_fatal("could not find common ancestor of the source and target cluster's timelines\n"); } --- 452,524 ---- sourceHistory = rewind_parseTimeLineHistory(histfile, ControlFile_source.checkPointCopy.ThisTimeLineID, ! &sourceNentries); pg_free(histfile); } ! if (targettli == 1) { ! targetHistory = (TimeLineHistoryEntry *) pg_malloc(sizeof(TimeLineHistoryEntry)); ! targetHistory->tli = targettli; ! targetHistory->begin = targetHistory->end = InvalidXLogRecPtr; ! targetNentries = 1; ! } ! else ! { ! char path[MAXPGPATH]; ! char *histfile; ! ! TLHistoryFilePath(path, targettli); ! histfile = slurpFile(datadir_target, path, NULL); ! ! targetHistory = rewind_parseTimeLineHistory(histfile, ! ControlFile_target.checkPointCopy.ThisTimeLineID, ! &targetNentries); ! pg_free(histfile); ! } ! ! if (debug) ! { ! printf("Target timeline history:\n"); ! /* ! * Print the target timeline history. ! */ ! for (i = 0; i < targetNentries; i++) { ! TimeLineHistoryEntry *targetEntry; ! targetEntry = &targetHistory[i]; ! printf("%d: %X/%X - %X/%X\n", targetEntry->tli, ! (uint32) (targetEntry->begin >> 32), (uint32) (targetEntry->begin), ! (uint32) (targetEntry->end >> 32), (uint32) (targetEntry->end)); } } ! /* ! * Trace the history forward, until we hit the timeline diverge. ! */ ! n = Min(sourceNentries, targetNentries); ! for (i = 0; i < n; i++) ! { ! if (sourceHistory[i].tli != targetHistory[i].tli ! || sourceHistory[i].begin != targetHistory[i].begin) ! break; ! } ! ! if (i > 0) ! { ! i--; ! *recptr = xlPtrMin(sourceHistory[i].end, targetHistory[i].end); ! *tliIndex = i; ! ! pg_free(sourceHistory); ! return; ! } ! else ! { ! pg_fatal("could not find common ancestor of the source and target cluster's timelines\n"); ! } } diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h new file mode 100644 index e281369..4826dde *** a/src/bin/pg_rewind/pg_rewind.h --- b/src/bin/pg_rewind/pg_rewind.h *************** extern bool debug; *** 27,41 **** extern bool showprogress; extern bool dry_run; /* in parsexlog.c */ extern void extractPageMap(const char *datadir, XLogRecPtr startpoint, ! TimeLineID tli, XLogRecPtr endpoint); extern void findLastCheckpoint(const char *datadir, XLogRecPtr searchptr, ! TimeLineID tli, XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli, XLogRecPtr *lastchkptredo); extern XLogRecPtr readOneRecord(const char *datadir, XLogRecPtr ptr, ! TimeLineID tli); /* in timeline.c */ extern TimeLineHistoryEntry *rewind_parseTimeLineHistory(char *buffer, --- 27,45 ---- extern bool showprogress; extern bool dry_run; + /* Target history */ + extern TimeLineHistoryEntry *targetHistory; + extern int targetNentries; + /* in parsexlog.c */ extern void extractPageMap(const char *datadir, XLogRecPtr startpoint, ! int tliIndex, XLogRecPtr endpoint); extern void findLastCheckpoint(const char *datadir, XLogRecPtr searchptr, ! int tliIndex, XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli, XLogRecPtr *lastchkptredo); extern XLogRecPtr readOneRecord(const char *datadir, XLogRecPtr ptr, ! int tliIndex); /* in timeline.c */ extern TimeLineHistoryEntry *rewind_parseTimeLineHistory(char *buffer,