From a5f48cabe5e2de4b38b099c9ccbeb35c093ba4f8 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 31 Mar 2020 11:50:05 -0400 Subject: [PATCH v16 4/4] WIP: Store WAL ranges in manifest and validate them w/pg_waldump. --- src/backend/replication/basebackup.c | 109 ++++++- src/bin/pg_validatebackup/parse_manifest.c | 270 ++++++++++++++---- src/bin/pg_validatebackup/parse_manifest.h | 5 + src/bin/pg_validatebackup/pg_validatebackup.c | 183 ++++++++++-- .../pg_validatebackup/t/005_bad_manifest.pl | 9 +- 5 files changed, 492 insertions(+), 84 deletions(-) diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index deaa4f1c34..f56d2c97b5 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -16,6 +16,7 @@ #include #include +#include "access/timeline.h" #include "access/xlog_internal.h" /* for pg_start/stop_backup */ #include "catalog/pg_type.h" #include "common/checksum_helper.h" @@ -99,6 +100,9 @@ static void AppendStringToManifest(manifest_info *manifest, char *s); static void AddFileToManifest(manifest_info *manifest, const char *spcoid, const char *pathname, size_t size, time_t mtime, pg_checksum_context *checksum_ctx); +static void AddWALInfoToManifest(manifest_info *manifest, XLogRecPtr startptr, + TimeLineID starttli, XLogRecPtr endptr, + TimeLineID endtli); static void SendBackupManifest(manifest_info *manifest); static void perform_base_backup(basebackup_options *opt); static void parse_basebackup_options(List *options, basebackup_options *opt); @@ -740,6 +744,8 @@ perform_base_backup(basebackup_options *opt) pq_putemptymessage('c'); } + AddWALInfoToManifest(&manifest, startptr, starttli, endptr, endtli); + SendBackupManifest(&manifest); SendXlogRecPtrResult(endptr, endtli); @@ -1209,6 +1215,101 @@ AddFileToManifest(manifest_info *manifest, const char *spcoid, pfree(buf.data); } +/* + * Add information about the WAL that will need to be replayed when restoring + * this backup to the manifest. + */ +static void +AddWALInfoToManifest(manifest_info *manifest, XLogRecPtr startptr, + TimeLineID starttli, XLogRecPtr endptr, TimeLineID endtli) +{ + List *timelines = readTimeLineHistory(endtli); + ListCell *lc; + bool first_wal_range = true; + bool found_ending_tli = false; + + /* If there is no buffile, then the user doesn't want a manifest. */ + if (manifest->buffile == NULL) + return; + + /* Terminate the list of files. */ + AppendStringToManifest(manifest, "\n],\n"); + + /* Start a list of LSN ranges. */ + AppendStringToManifest(manifest, "\"WAL-Ranges\": [\n"); + + foreach (lc, timelines) + { + TimeLineHistoryEntry *entry = lfirst(lc); + XLogRecPtr tl_endptr; + + /* + * We only care about timelines that were active during the backup. + * Skip any that ended before the backup started. (Note that if + * entry->end is InvalidXLogRecPtr, it means that the timeline has not + * yet ended.) + */ + if (!XLogRecPtrIsInvalid(entry->end) && entry->end < startptr) + continue; + + /* + * Because the timeline history file lists older timelines before + * newer ones, the first timeline we encounter that is new enough to + * matter ought to match the starting timeline of the backup. + */ + if (first_wal_range && starttli != entry->tli) + ereport(ERROR, + errmsg("start timeline %u does not match timeline history", + starttli)); + + if (!XLogRecPtrIsInvalid(entry->end)) + tl_endptr = entry->end; + else + { + tl_endptr = endptr; + + /* + * If we reach a TLI that has no end LSN, there can't be any more + * timelines in the history after this point, so we'd better have + * arrived at the expected ending TLI. If not, something's gone + * horribly wrong. + */ + if (endtli != entry->tli) + ereport(ERROR, + errmsg("end timeline %u does not match timeline history", + endtli)); + } + + AppendToManifest(manifest, + "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }", + first_wal_range ? "" : ",\n", + entry->tli, + (uint32) (startptr >> 32), (uint32) startptr, + (uint32) (tl_endptr >> 32), (uint32) tl_endptr); + + startptr = entry->end; + if (endtli == entry->tli) + { + found_ending_tli = true; + break; + } + + first_wal_range = false; + } + + /* + * The last entry in the timeline history for the ending timeline should + * be the ending timeline itself. Verify that this is what we observed. + */ + if (!found_ending_tli) + ereport(ERROR, + errmsg("ending timeline %u not found in timeline history", + endtli)); + + /* Terminate the list of WAL ranges. */ + AppendStringToManifest(manifest, "\n],\n"); +} + /* * Finalize the backup manifest, and send it to the client. */ @@ -1220,16 +1321,10 @@ SendBackupManifest(manifest_info *manifest) char checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH]; size_t manifest_bytes_done = 0; - /* - * If there is no buffile, then the user doesn't want a manifest, so - * don't waste any time generating one. - */ + /* If there is no buffile, then the user doesn't want a manifest. */ if (manifest->buffile == NULL) return; - /* Terminate the list of files. */ - AppendStringToManifest(manifest, "],\n"); - /* * Append manifest checksum, so that the problems with the manifest itself * can be detected. diff --git a/src/bin/pg_validatebackup/parse_manifest.c b/src/bin/pg_validatebackup/parse_manifest.c index e6b42adfda..461ac36b76 100644 --- a/src/bin/pg_validatebackup/parse_manifest.c +++ b/src/bin/pg_validatebackup/parse_manifest.c @@ -23,14 +23,16 @@ typedef enum { JM_EXPECT_TOPLEVEL_START, JM_EXPECT_TOPLEVEL_END, - JM_EXPECT_VERSION_FIELD, + JM_EXPECT_TOPLEVEL_FIELD, JM_EXPECT_VERSION_VALUE, - JM_EXPECT_FILES_FIELD, - JM_EXPECT_FILES_ARRAY_START, - JM_EXPECT_FILES_ARRAY_NEXT, + JM_EXPECT_FILES_START, + JM_EXPECT_FILES_NEXT, JM_EXPECT_THIS_FILE_FIELD, JM_EXPECT_THIS_FILE_VALUE, - JM_EXPECT_MANIFEST_CHECKSUM_FIELD, + JM_EXPECT_WAL_RANGES_START, + JM_EXPECT_WAL_RANGES_NEXT, + JM_EXPECT_THIS_WAL_RANGE_FIELD, + JM_EXPECT_THIS_WAL_RANGE_VALUE, JM_EXPECT_MANIFEST_CHECKSUM_VALUE, JM_EXPECT_EOF } JsonManifestSemanticState; @@ -48,6 +50,16 @@ typedef enum JMFF_CHECKSUM } JsonManifestFileField; +/* + * Possible fields for one file as described by the manifest. + */ +typedef enum +{ + JMWRF_TIMELINE, + JMWRF_START_LSN, + JMWRF_END_LSN +} JsonManifestWALRangeField; + /* * Internal state used while decoding the JSON-format backup manifest. */ @@ -55,13 +67,24 @@ typedef struct { JsonManifestParseContext *context; JsonManifestSemanticState state; - JsonManifestFileField field; + + /* These fields are used for parsing objects in the list of files. */ + JsonManifestFileField file_field; char *pathname; char *encoded_pathname; char *size; char *algorithm; pg_checksum_type checksum_algorithm; char *checksum; + + /* These fields are used for parsing objects in the list of WAL ranges. */ + JsonManifestWALRangeField wal_range_field; + char *timeline; + char *start_lsn; + char *end_lsn; + + /* Miscellaneous other stuff. */ + bool saw_version_field; char *manifest_checksum; } JsonManifestParseState; @@ -74,6 +97,7 @@ static void json_manifest_object_field_start(void *state, char *fname, static void json_manifest_scalar(void *state, char *token, JsonTokenType tokentype); static void json_manifest_finalize_file(JsonManifestParseState *parse); +static void json_manifest_finalize_wal_range(JsonManifestParseState *parse); static void verify_manifest_checksum(JsonManifestParseState *parse, char *buffer, size_t size); static void json_manifest_parse_failure(JsonManifestParseContext *context, @@ -81,6 +105,7 @@ static void json_manifest_parse_failure(JsonManifestParseContext *context, static int hexdecode_char(char c); static bool hexdecode_string(uint8 *result, char *input, int nbytes); +static bool parse_xlogrecptr(XLogRecPtr *result, char *input); /* * Main entrypoint to parse a JSON-format backup manifest. @@ -100,8 +125,9 @@ json_parse_manifest(JsonManifestParseContext *context, char *buffer, JsonManifestParseState parse; /* Set up our private parsing context. */ - parse.state = JM_EXPECT_TOPLEVEL_START; parse.context = context; + parse.state = JM_EXPECT_TOPLEVEL_START; + parse.saw_version_field = false; /* Create a JSON lexing context. */ lex = makeJsonLexContextCstringLen(buffer, size, PG_UTF8, true); @@ -132,11 +158,9 @@ json_parse_manifest(JsonManifestParseContext *context, char *buffer, /* * Invoked at the start of each object in the JSON document. * - * The document as a whole is expected to be an object with three keys - * (PostgreSQL-Backup-Manifest-Version, Files, Manifest-Checksum) and each - * file is expected to be an object with various keys (Path, Size, etc.). - * If we're not at the beginning of either the toplevel object or the object - * for a particular file, it's an error. + * The document as a whole is expected to be an object; each file and each + * WAL range is also expected to be an object. If we're anywhere else in the + * document, it's an error. */ static void json_manifest_object_start(void *state) @@ -146,9 +170,9 @@ json_manifest_object_start(void *state) switch (parse->state) { case JM_EXPECT_TOPLEVEL_START: - parse->state = JM_EXPECT_VERSION_FIELD; + parse->state = JM_EXPECT_TOPLEVEL_FIELD; break; - case JM_EXPECT_FILES_ARRAY_NEXT: + case JM_EXPECT_FILES_NEXT: parse->state = JM_EXPECT_THIS_FILE_FIELD; parse->pathname = NULL; parse->encoded_pathname = NULL; @@ -156,6 +180,12 @@ json_manifest_object_start(void *state) parse->algorithm = NULL; parse->checksum = NULL; break; + case JM_EXPECT_WAL_RANGES_NEXT: + parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD; + parse->timeline = NULL; + parse->start_lsn = NULL; + parse->end_lsn = NULL; + break; default: json_manifest_parse_failure(parse->context, "unexpected object start"); @@ -168,8 +198,8 @@ json_manifest_object_start(void *state) * * The possible cases here are the same as for json_manifest_object_start. * There's nothing special to do at the end of the document, but when we - * reach the end of an object representing a particular file, we must call - * json_manifest_finalize_file() to save the associated details. + * reach the end of an object representing a particular file or WAL range, + * we must call json_manifest_finalize_file() to save the associated details. */ static void json_manifest_object_end(void *state) @@ -183,7 +213,11 @@ json_manifest_object_end(void *state) break; case JM_EXPECT_THIS_FILE_FIELD: json_manifest_finalize_file(parse); - parse->state = JM_EXPECT_FILES_ARRAY_NEXT; + parse->state = JM_EXPECT_FILES_NEXT; + break; + case JM_EXPECT_THIS_WAL_RANGE_FIELD: + json_manifest_finalize_wal_range(parse); + parse->state = JM_EXPECT_WAL_RANGES_NEXT; break; default: json_manifest_parse_failure(parse->context, @@ -196,7 +230,8 @@ json_manifest_object_end(void *state) * Invoked at the start of each array in the JSON document. * * Within the toplevel object, the value associated with the "Files" key - * should be an array. No other arrays are expected. + * should be an array. Similarly for the "WAL-Ranges" key. No other arrays + * are expected. */ static void json_manifest_array_start(void *state) @@ -205,8 +240,11 @@ json_manifest_array_start(void *state) switch (parse->state) { - case JM_EXPECT_FILES_ARRAY_START: - parse->state = JM_EXPECT_FILES_ARRAY_NEXT; + case JM_EXPECT_FILES_START: + parse->state = JM_EXPECT_FILES_NEXT; + break; + case JM_EXPECT_WAL_RANGES_START: + parse->state = JM_EXPECT_WAL_RANGES_NEXT; break; default: json_manifest_parse_failure(parse->context, @@ -218,8 +256,7 @@ json_manifest_array_start(void *state) /* * Invoked at the end of each array in the JSON document. * - * Just like json_manifest_array_start, there's only one expected case - * here. + * The cases here are analogous to those in json_manifest_array_start. */ static void json_manifest_array_end(void *state) @@ -228,8 +265,9 @@ json_manifest_array_end(void *state) switch (parse->state) { - case JM_EXPECT_FILES_ARRAY_NEXT: - parse->state = JM_EXPECT_MANIFEST_CHECKSUM_FIELD; + case JM_EXPECT_FILES_NEXT: + case JM_EXPECT_WAL_RANGES_NEXT: + parse->state = JM_EXPECT_TOPLEVEL_FIELD; break; default: json_manifest_parse_failure(parse->context, @@ -248,46 +286,82 @@ json_manifest_object_field_start(void *state, char *fname, bool isnull) switch (parse->state) { - case JM_EXPECT_VERSION_FIELD: - /* Inside toplevel object, expecting version indicator. */ - if (strcmp(fname, "PostgreSQL-Backup-Manifest-Version") != 0) - json_manifest_parse_failure(parse->context, - "expected version indicator"); - parse->state = JM_EXPECT_VERSION_VALUE; - break; - case JM_EXPECT_FILES_FIELD: - /* Inside toplevel object, expecting "Files" next. */ - if (strcmp(fname, "Files") != 0) - json_manifest_parse_failure(parse->context, - "expected file list"); - parse->state = JM_EXPECT_FILES_ARRAY_START; + case JM_EXPECT_TOPLEVEL_FIELD: + /* + * Inside toplevel object. The version indicator should always + * be the first field. + */ + if (!parse->saw_version_field) + { + if (strcmp(fname, "PostgreSQL-Backup-Manifest-Version") != 0) + json_manifest_parse_failure(parse->context, + "expected version indicator"); + parse->state = JM_EXPECT_VERSION_VALUE; + parse->saw_version_field = true; + break; + } + + /* Is this the list of files? */ + if (strcmp(fname, "Files") == 0) + { + parse->state = JM_EXPECT_FILES_START; + break; + } + + /* Is this the list of WAL ranges? */ + if (strcmp(fname, "WAL-Ranges") == 0) + { + parse->state = JM_EXPECT_WAL_RANGES_START; + break; + } + + /* Is this the manifest checksum? */ + if (strcmp(fname, "Manifest-Checksum") == 0) + { + parse->state = JM_EXPECT_MANIFEST_CHECKSUM_VALUE; + break; + } + + /* It's not a field we recognize. */ + fprintf(stderr, "fname = %s\n", fname); + json_manifest_parse_failure(parse->context, + "unknown toplevel field"); break; + case JM_EXPECT_THIS_FILE_FIELD: /* Inside object for one file; which key have we got? */ if (strcmp(fname, "Path") == 0) - parse->field = JMFF_PATH; + parse->file_field = JMFF_PATH; else if (strcmp(fname, "Encoded-Path") == 0) - parse->field = JMFF_ENCODED_PATH; + parse->file_field = JMFF_ENCODED_PATH; else if (strcmp(fname, "Size") == 0) - parse->field = JMFF_SIZE; + parse->file_field = JMFF_SIZE; else if (strcmp(fname, "Last-Modified") == 0) - parse->field = JMFF_LAST_MODIFIED; + parse->file_field = JMFF_LAST_MODIFIED; else if (strcmp(fname, "Checksum-Algorithm") == 0) - parse->field = JMFF_CHECKSUM_ALGORITHM; + parse->file_field = JMFF_CHECKSUM_ALGORITHM; else if (strcmp(fname, "Checksum") == 0) - parse->field = JMFF_CHECKSUM; + parse->file_field = JMFF_CHECKSUM; else json_manifest_parse_failure(parse->context, "unexpected file field"); parse->state = JM_EXPECT_THIS_FILE_VALUE; break; - case JM_EXPECT_MANIFEST_CHECKSUM_FIELD: - /* Inside toplevel object, expecting "Manifest-Checksum" next. */ - if (strcmp(fname, "Manifest-Checksum") != 0) + + case JM_EXPECT_THIS_WAL_RANGE_FIELD: + /* Inside object for one file; which key have we got? */ + if (strcmp(fname, "Timeline") == 0) + parse->wal_range_field = JMWRF_TIMELINE; + else if (strcmp(fname, "Start-LSN") == 0) + parse->wal_range_field = JMWRF_START_LSN; + else if (strcmp(fname, "End-LSN") == 0) + parse->wal_range_field = JMWRF_END_LSN; + else json_manifest_parse_failure(parse->context, - "expected manifest checksum"); - parse->state = JM_EXPECT_MANIFEST_CHECKSUM_VALUE; + "unexpected wal range field"); + parse->state = JM_EXPECT_THIS_WAL_RANGE_VALUE; break; + default: json_manifest_parse_failure(parse->context, "unexpected object field"); @@ -300,9 +374,9 @@ json_manifest_object_field_start(void *state, char *fname, bool isnull) * * Object field names don't reach this code; those are handled by * json_manifest_object_field_start. When we're inside of the object for - * a particular file, that function will have noticed the name of the field, - * and we'll get the corresponding value here. When we're in the toplevel - * object, the parse state itself tells us which field this is. + * a particular file or WAL range, that function will have noticed the name + * of the field, and we'll get the corresponding value here. When we're in + * the toplevel object, the parse state itself tells us which field this is. * * In all cases except for PostgreSQL-Backup-Manifest-Version, which we * can just check on the spot, the goal here is just to save the value in @@ -321,10 +395,11 @@ json_manifest_scalar(void *state, char *token, JsonTokenType tokentype) if (strcmp(token, "1") != 0) json_manifest_parse_failure(parse->context, "unexpected manifest version"); - parse->state = JM_EXPECT_FILES_FIELD; + parse->state = JM_EXPECT_TOPLEVEL_FIELD; break; + case JM_EXPECT_THIS_FILE_VALUE: - switch (parse->field) + switch (parse->file_field) { case JMFF_PATH: parse->pathname = token; @@ -347,10 +422,28 @@ json_manifest_scalar(void *state, char *token, JsonTokenType tokentype) } parse->state = JM_EXPECT_THIS_FILE_FIELD; break; + + case JM_EXPECT_THIS_WAL_RANGE_VALUE: + switch (parse->wal_range_field) + { + case JMWRF_TIMELINE: + parse->timeline = token; + break; + case JMWRF_START_LSN: + parse->start_lsn = token; + break; + case JMWRF_END_LSN: + parse->end_lsn = token; + break; + } + parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD; + break; + case JM_EXPECT_MANIFEST_CHECKSUM_VALUE: parse->state = JM_EXPECT_TOPLEVEL_END; parse->manifest_checksum = token; break; + default: json_manifest_parse_failure(parse->context, "unexpected scalar"); break; @@ -459,6 +552,62 @@ json_manifest_finalize_file(JsonManifestParseState *parse) } } +/* + * Do additional parsing and sanity-checking of the details gathered for one + * WAL range, and invoke the per-WAL-range callback so that the caller gets + * those details. This happens for each WAL range when the corresponding JSON + * object is completely parsed. + */ +static void +json_manifest_finalize_wal_range(JsonManifestParseState *parse) +{ + JsonManifestParseContext *context = parse->context; + TimeLineID tli; + XLogRecPtr start_lsn, + end_lsn; + char *ep; + + /* Make sure all fields are present. */ + if (parse->timeline == NULL) + json_manifest_parse_failure(parse->context, "missing timeline"); + if (parse->start_lsn == NULL) + json_manifest_parse_failure(parse->context, "missing start LSN"); + if (parse->end_lsn == NULL) + json_manifest_parse_failure(parse->context, "missing end LSN"); + + /* Parse timeline. */ + tli = strtoul(parse->timeline, &ep, 10); + if (*ep) + json_manifest_parse_failure(parse->context, + "timeline is not an integer"); + if (!parse_xlogrecptr(&start_lsn, parse->start_lsn)) + json_manifest_parse_failure(parse->context, + "unable to parse start LSN"); + if (!parse_xlogrecptr(&end_lsn, parse->end_lsn)) + json_manifest_parse_failure(parse->context, + "unable to parse end LSN"); + + /* Invoke the callback with the details we've gathered. */ + context->perwalrange_cb(context, tli, start_lsn, end_lsn); + + /* Free memory we no longer need. */ + if (parse->timeline != NULL) + { + pfree(parse->timeline); + parse->timeline = NULL; + } + if (parse->start_lsn != NULL) + { + pfree(parse->start_lsn); + parse->start_lsn = NULL; + } + if (parse->end_lsn != NULL) + { + pfree(parse->end_lsn); + parse->end_lsn = NULL; + } +} + /* * Verify that the manifest checksum is correct. * @@ -574,3 +723,18 @@ hexdecode_string(uint8 *result, char *input, int nbytes) return true; } + +/* + * Parse an XLogRecPtr expressed using the usual string format. + */ +static bool +parse_xlogrecptr(XLogRecPtr *result, char *input) +{ + uint32 hi; + uint32 lo; + + if (sscanf(input, "%X/%X", &hi, &lo) != 2) + return false; + *result = ((uint64) hi) << 32 | lo; + return true; +} diff --git a/src/bin/pg_validatebackup/parse_manifest.h b/src/bin/pg_validatebackup/parse_manifest.h index 25d140f72f..f0a4fac36b 100644 --- a/src/bin/pg_validatebackup/parse_manifest.h +++ b/src/bin/pg_validatebackup/parse_manifest.h @@ -14,6 +14,7 @@ #ifndef PARSE_MANIFEST_H #define PARSE_MANIFEST_H +#include "access/xlogdefs.h" #include "common/checksum_helper.h" #include "mb/pg_wchar.h" @@ -24,6 +25,9 @@ typedef void (*json_manifest_perfile_callback)(JsonManifestParseContext *, char *pathname, size_t size, pg_checksum_type checksum_type, int checksum_length, uint8 *checksum_payload); +typedef void (*json_manifest_perwalrange_callback)(JsonManifestParseContext *, + TimeLineID tli, + XLogRecPtr start_lsn, XLogRecPtr end_lsn); typedef void (*json_manifest_error_callback)(JsonManifestParseContext *, char *fmt, ...) pg_attribute_printf(2, 3); @@ -31,6 +35,7 @@ struct JsonManifestParseContext { void *private_data; json_manifest_perfile_callback perfile_cb; + json_manifest_perwalrange_callback perwalrange_cb; json_manifest_error_callback error_cb; }; diff --git a/src/bin/pg_validatebackup/pg_validatebackup.c b/src/bin/pg_validatebackup/pg_validatebackup.c index eb1473d9d0..2c9d06a3a1 100644 --- a/src/bin/pg_validatebackup/pg_validatebackup.c +++ b/src/bin/pg_validatebackup/pg_validatebackup.c @@ -43,8 +43,8 @@ #define READ_CHUNK_SIZE 4096 /* - * Information about each file described by the manifest file is parsed to - * produce an object like this. + * Each file described by the manifest file is parsed to produce an object + * like this. */ typedef struct manifestfile { @@ -75,6 +75,29 @@ static uint32 hash_string_pointer(char *s); #define SH_DEFINE #include "lib/simplehash.h" +/* + * Each WAL range described by the manifest file is parsed to produce an + * object like this. + */ +typedef struct manifest_wal_range +{ + TimeLineID tli; + XLogRecPtr start_lsn; + XLogRecPtr end_lsn; + struct manifest_wal_range *next; + struct manifest_wal_range *prev; +} manifest_wal_range; + +/* + * Details we need in callbacks that occur while parsing a backup manifest. + */ +typedef struct parser_context +{ + manifestfiles_hash *ht; + manifest_wal_range *first_wal_range; + manifest_wal_range *last_wal_range; +} parser_context; + /* * All of the context information we need while checking a backup manifest. */ @@ -87,13 +110,18 @@ typedef struct validator_context bool saw_any_error; } validator_context; -static manifestfiles_hash *parse_manifest_file(char *manifest_path); +static void parse_manifest_file(char *manifest_path, manifestfiles_hash **ht_p, + manifest_wal_range **first_wal_range_p); static void record_manifest_details_for_file(JsonManifestParseContext *context, char *pathname, size_t size, pg_checksum_type checksum_type, int checksum_length, uint8 *checksum_payload); +static void record_manifest_details_for_wal_range(JsonManifestParseContext *context, + TimeLineID tli, + XLogRecPtr start_lsn, + XLogRecPtr end_lsn); static void report_manifest_error(JsonManifestParseContext *context, char *fmt, ...) pg_attribute_printf(2, 3) pg_attribute_noreturn(); @@ -106,6 +134,10 @@ static void report_extra_backup_files(validator_context *context); static void validate_backup_checksums(validator_context *context); static void validate_file_checksum(validator_context *context, manifestfile *tabent, char *pathname); +static void parse_required_wal(validator_context *context, + char *pg_waldump_path, + char *wal_directory, + manifest_wal_range *first_wal_range); static void report_backup_error(validator_context *context, const char *pg_restrict fmt,...) @@ -128,16 +160,23 @@ main(int argc, char **argv) {"exit-on-error", no_argument, NULL, 'e'}, {"ignore", required_argument, NULL, 'i'}, {"manifest-path", required_argument, NULL, 'm'}, + {"no-parse-wal", no_argument, NULL, 'n'}, + {"print-parse-wal", no_argument, NULL, 'p'}, {"quiet", no_argument, NULL, 'q'}, {"skip-checksums", no_argument, NULL, 's'}, + {"wal-directory", required_argument, NULL, 'w'}, {NULL, 0, NULL, 0} }; int c; validator_context context; + manifest_wal_range *first_wal_range; char *manifest_path = NULL; + bool no_parse_wal = false; bool quiet = false; bool skip_checksums = false; + char *wal_directory = NULL; + char *pg_waldump_path = NULL; pg_logging_init(argv[0]); set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_validatebackup")); @@ -167,7 +206,7 @@ main(int argc, char **argv) * * Ignore the pg_wal directory, because those files are not included in * the backup manifest either, since they are fetched separately from the - * backup itself. + * backup itself, and validated via a separate mechanism. * * Ignore postgresql.auto.conf, recovery.signal, and standby.signal, * because we expect that those files may sometimes be created or changed @@ -180,7 +219,7 @@ main(int argc, char **argv) simple_string_list_append(&context.ignore_list, "recovery.signal"); simple_string_list_append(&context.ignore_list, "standby.signal"); - while ((c = getopt_long(argc, argv, "ei:m:qs", long_options, NULL)) != -1) + while ((c = getopt_long(argc, argv, "ei:m:nqsw", long_options, NULL)) != -1) { switch (c) { @@ -199,12 +238,19 @@ main(int argc, char **argv) manifest_path = pstrdup(optarg); canonicalize_path(manifest_path); break; + case 'n': + no_parse_wal = true; + break; case 'q': quiet = true; break; case 's': skip_checksums = true; break; + case 'w': + wal_directory = pstrdup(optarg); + canonicalize_path(wal_directory); + break; default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); @@ -233,17 +279,49 @@ main(int argc, char **argv) exit(1); } + /* Unless --no-parse-wal was specified, we will need pg_waldump. */ + if (!no_parse_wal) + { + int ret; + + pg_waldump_path = pg_malloc(MAXPGPATH); + ret = find_other_exec(argv[0], "pg_waldump", + "pg_waldump (PostgreSQL) " PG_VERSION "\n", + pg_waldump_path); + if (ret < 0) + { + char full_path[MAXPGPATH]; + + if (find_my_exec(argv[0], full_path) < 0) + strlcpy(full_path, progname, sizeof(full_path)); + if (ret == -1) + pg_log_fatal("The program \"%s\" is needed by %s but was\n" + "not found in the same directory as \"%s\".\n" + "Check your installation.", + "pg_waldump", "pg_validatebackup", full_path); + else + pg_log_fatal("The program \"%s\" was found by \"%s\" but was\n" + "not the same version as %s.\n" + "Check your installation.", + "pg_waldump", full_path, "pg_validatebackup"); + } + } + /* By default, look for the manifest in the backup directory. */ if (manifest_path == NULL) manifest_path = psprintf("%s/backup_manifest", context.backup_directory); + /* By default, look for the WAL in the backup directory, too. */ + if (wal_directory == NULL) + wal_directory = psprintf("%s/pg_wal", context.backup_directory); + /* * Try to read the manifest. We treat any errors encountered while parsing * the manifest as fatal; there doesn't seem to be much point in trying to * validate the backup directory against a corrupted manifest. */ - context.ht = parse_manifest_file(manifest_path); + parse_manifest_file(manifest_path, &context.ht, &first_wal_range); /* * Now scan the files in the backup directory. At this stage, we verify @@ -261,12 +339,20 @@ main(int argc, char **argv) report_extra_backup_files(&context); /* - * Finally, do the expensive work of verifying file checksums, unless we - * were told to skip it. + * Now do the expensive work of verifying file checksums, unless we were + * told to skip it. */ if (!skip_checksums) validate_backup_checksums(&context); + /* + * Try to parse the required ranges of WAL records, unless we were told + * not to do so. + */ + if (!no_parse_wal) + parse_required_wal(&context, pg_waldump_path, + wal_directory, first_wal_range); + /* * If everything looks OK, tell the user this, unless we were asked to * work quietly. @@ -278,11 +364,13 @@ main(int argc, char **argv) } /* - * Parse a manifest file and construct a hash table with information about - * all the files it mentions. + * Parse a manifest file. Construct a hash table with information about + * all the files it mentions, and a linked list of all the WAL ranges it + * mentions. */ -static manifestfiles_hash * -parse_manifest_file(char *manifest_path) +static void +parse_manifest_file(char *manifest_path, manifestfiles_hash **ht_p, + manifest_wal_range **first_wal_range_p) { int fd; struct stat statbuf; @@ -291,6 +379,7 @@ parse_manifest_file(char *manifest_path) manifestfiles_hash *ht; char *buffer; int rc; + parser_context private_context; JsonManifestParseContext context; /* Open the manifest file. */ @@ -329,17 +418,22 @@ parse_manifest_file(char *manifest_path) /* Close the manifest file. */ close(fd); - /* Parse the manifest as JSON. */ - context.private_data = ht; + /* Parse the manifest. */ + private_context.ht = ht; + private_context.first_wal_range = NULL; + private_context.last_wal_range = NULL; + context.private_data = &private_context; context.perfile_cb = record_manifest_details_for_file; + context.perwalrange_cb = record_manifest_details_for_wal_range; context.error_cb = report_manifest_error; json_parse_manifest(&context, buffer, statbuf.st_size); /* Done with the buffer. */ pfree(buffer); - /* Return the hash table we constructed. */ - return ht; + /* Return the file hash table and WAL range list we constructed. */ + *ht_p = ht; + *first_wal_range_p = private_context.first_wal_range; } /* @@ -369,7 +463,8 @@ record_manifest_details_for_file(JsonManifestParseContext *context, pg_checksum_type checksum_type, int checksum_length, uint8 *checksum_payload) { - manifestfiles_hash *ht = context->private_data; + parser_context *pcxt = context->private_data; + manifestfiles_hash *ht = pcxt->ht; manifestfile *tabent; bool found; @@ -388,6 +483,31 @@ record_manifest_details_for_file(JsonManifestParseContext *context, tabent->bad = false; } +/* + * Record details extracted from the backup manifest for one WAL range. + */ +static void +record_manifest_details_for_wal_range(JsonManifestParseContext *context, + TimeLineID tli, + XLogRecPtr start_lsn, XLogRecPtr end_lsn) +{ + parser_context *pcxt = context->private_data; + manifest_wal_range *range; + + /* Allocate and initialize a struct describing this WAL range. */ + range = palloc(sizeof(manifest_wal_range)); + range->tli = tli; + range->start_lsn = start_lsn; + range->end_lsn = end_lsn; + range->prev = pcxt->last_wal_range; + range->next = NULL; + + /* Add it to the list. */ + if (pcxt->first_wal_range == NULL) + pcxt->first_wal_range = range; + pcxt->last_wal_range = range; +} + /* * Validate one directory. * @@ -641,6 +761,35 @@ validate_file_checksum(validator_context *context, manifestfile *tabent, relpath); } +/* + * Attempt to parse the WAL files required to restore from backup using + * pg_waldump. + */ +static void +parse_required_wal(validator_context *context, char *pg_waldump_path, + char *wal_directory, manifest_wal_range *first_wal_range) +{ + manifest_wal_range *this_wal_range = first_wal_range; + + while (this_wal_range != NULL) + { + char *pg_waldump_cmd; + + pg_waldump_cmd = psprintf("\"%s\" --quiet --path=\"%s\" --timeline=%u --start=%X/%X --end=%X/%X\n", + pg_waldump_path, wal_directory, this_wal_range->tli, + (uint32) (this_wal_range->start_lsn >> 32), + (uint32) this_wal_range->start_lsn, + (uint32) (this_wal_range->end_lsn >> 32), + (uint32) this_wal_range->end_lsn); + if (system(pg_waldump_cmd) != 0) + report_backup_error(context, + "WAL parsing failed for timeline %u", + this_wal_range->tli); + + this_wal_range = this_wal_range->next; + } +} + /* * Report a problem with the backup. * diff --git a/src/bin/pg_validatebackup/t/005_bad_manifest.pl b/src/bin/pg_validatebackup/t/005_bad_manifest.pl index 9c503600d2..23c2f8338c 100644 --- a/src/bin/pg_validatebackup/t/005_bad_manifest.pl +++ b/src/bin/pg_validatebackup/t/005_bad_manifest.pl @@ -7,7 +7,7 @@ use Cwd; use Config; use PostgresNode; use TestLib; -use Test::More tests => 44; +use Test::More tests => 42; my $tempdir = TestLib::tempdir; @@ -37,7 +37,7 @@ test_parse_error('unexpected scalar', <