Re: pg_waldump: support decoding of WAL inside tarfile - Mailing list pgsql-hackers
| From | Tom Lane |
|---|---|
| Subject | Re: pg_waldump: support decoding of WAL inside tarfile |
| Date | |
| Msg-id | 3118179.1775092964@sss.pgh.pa.us Whole thread Raw |
| In response to | Re: pg_waldump: support decoding of WAL inside tarfile (Thomas Munro <thomas.munro@gmail.com>) |
| Responses |
Re: pg_waldump: support decoding of WAL inside tarfile
|
| List | pgsql-hackers |
Thomas Munro <thomas.munro@gmail.com> writes:
> On Thu, Apr 2, 2026 at 7:25 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
>> Also, if we are admitting the possibility that what we are reading
>> was made by a platform-supplied tar and not our own code, I think
>> it verges on lunacy to behave as though unsupported typeflags are
>> regular files.
> Yeah, if this is the first time we parse files we didn't make then
> that makes total sense. I was a bit unsure of that question when I
> suggested we reject pax only after we've failed to find a file, in
> case there are scenarios that work today with harmless ignorable pax
> headers that don't change the file name.
Of course this is all new so far as pg_waldump is concerned.
I'm a bit unclear about whether pg_verifybackup's exposure
is large enough to warrant back-patching any of this.
Looking again at astreamer_tar.c, I suddenly realized that it doesn't
do any meaningful input validation. So if you feed it junk input,
you get garbage errors that aren't even predictable:
$ head -c 32768 /dev/urandom >junk.tar
$ pg_waldump --path junk.tar --start 1/0x10000000
pg_waldump: error: COPY stream ended before last file was finished
$ head -c 32768 /dev/urandom >junk.tar
$ pg_waldump --path junk.tar --start 1/0x10000000
pg_waldump: error: tar member has empty name
$ head -c 32768 /dev/urandom >junk.tar
$ pg_waldump --path junk.tar --start 1/0x10000000
pg_waldump: error: COPY stream ended before last file was finished
$ head -c 32768 /dev/urandom >junk.tar
$ pg_waldump --path junk.tar --start 1/0x10000000
pg_waldump: error: could not find WAL in archive "junk.tar"
tar itself is considerably saner:
$ tar tf junk.tar
tar: This does not look like a tar archive
tar: Skipping to next header
tar: Exiting with failure status due to previous errors
So I think we need something like the attached, in addition
to what I sent before. This just makes astreamer_tar.c use
the isValidTarHeader function that pg_dump already had.
(I decided to const-ify isValidTarHeader's argument while
moving it to a shared location, which in turn requires
const-ifying tarChecksum.)
regards, tom lane
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 271a2c3e481..fecf6f2d1ce 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -44,6 +44,7 @@
#include "pg_backup_archiver.h"
#include "pg_backup_db.h"
#include "pg_backup_utils.h"
+#include "pgtar.h"
#define TEXT_DUMP_HEADER "--\n-- PostgreSQL database dump\n--\n\n"
#define TEXT_DUMPALL_HEADER "--\n-- PostgreSQL database cluster dump\n--\n\n"
@@ -2372,7 +2373,7 @@ _discoverArchiveFormat(ArchiveHandle *AH)
}
if (!isValidTarHeader(AH->lookahead))
- pg_fatal("input file does not appear to be a valid archive");
+ pg_fatal("input file does not appear to be a valid tar archive");
AH->format = archTar;
}
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index 365073b3eae..9c3aca6543a 100644
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@@ -465,8 +465,6 @@ extern void InitArchiveFmt_Null(ArchiveHandle *AH);
extern void InitArchiveFmt_Directory(ArchiveHandle *AH);
extern void InitArchiveFmt_Tar(ArchiveHandle *AH);
-extern bool isValidTarHeader(char *header);
-
extern void ReconnectToServer(ArchiveHandle *AH, const char *dbname);
extern void IssueCommandPerBlob(ArchiveHandle *AH, TocEntry *te,
const char *cmdBegin, const char *cmdEnd);
diff --git a/src/bin/pg_dump/pg_backup_tar.c b/src/bin/pg_dump/pg_backup_tar.c
index d94d0de2a5d..a3879410c94 100644
--- a/src/bin/pg_dump/pg_backup_tar.c
+++ b/src/bin/pg_dump/pg_backup_tar.c
@@ -984,31 +984,6 @@ tarPrintf(TAR_MEMBER *th, const char *fmt,...)
return (int) cnt;
}
-bool
-isValidTarHeader(char *header)
-{
- int sum;
- int chk = tarChecksum(header);
-
- sum = read_tar_number(&header[TAR_OFFSET_CHECKSUM], 8);
-
- if (sum != chk)
- return false;
-
- /* POSIX tar format */
- if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar\0", 6) == 0 &&
- memcmp(&header[TAR_OFFSET_VERSION], "00", 2) == 0)
- return true;
- /* GNU tar format */
- if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar \0", 8) == 0)
- return true;
- /* not-quite-POSIX format written by pre-9.3 pg_dump */
- if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar00\0", 8) == 0)
- return true;
-
- return false;
-}
-
/* Given the member, write the TAR header & copy the file */
static void
_tarAddFile(ArchiveHandle *AH, TAR_MEMBER *th)
diff --git a/src/fe_utils/astreamer_tar.c b/src/fe_utils/astreamer_tar.c
index 3b094fc0328..8fa3329237d 100644
--- a/src/fe_utils/astreamer_tar.c
+++ b/src/fe_utils/astreamer_tar.c
@@ -260,7 +260,8 @@ astreamer_tar_parser_content(astreamer *streamer, astreamer_member *member,
* Parse a file header within a tar stream.
*
* The return value is true if we found a file header and passed it on to the
- * next astreamer; it is false if we have reached the archive trailer.
+ * next astreamer; it is false if we have found the archive trailer.
+ * We throw error if we see invalid data.
*/
static bool
astreamer_tar_header(astreamer_tar_parser *mystreamer)
@@ -289,6 +290,12 @@ astreamer_tar_header(astreamer_tar_parser *mystreamer)
if (!has_nonzero_byte)
return false;
+ /*
+ * Verify that we have a reasonable-looking header.
+ */
+ if (!isValidTarHeader(buffer))
+ pg_fatal("input file does not appear to be a valid tar archive");
+
/*
* Parse key fields out of the header.
*/
diff --git a/src/include/pgtar.h b/src/include/pgtar.h
index eb93bdef5c4..55b8c7c77a4 100644
--- a/src/include/pgtar.h
+++ b/src/include/pgtar.h
@@ -68,7 +68,8 @@ extern enum tarError tarCreateHeader(char *h, const char *filename,
time_t mtime);
extern uint64 read_tar_number(const char *s, int len);
extern void print_tar_number(char *s, int len, uint64 val);
-extern int tarChecksum(char *header);
+extern int tarChecksum(const char *header);
+extern bool isValidTarHeader(const char *header);
/*
* Compute the number of padding bytes required for an entry in a tar
diff --git a/src/port/tar.c b/src/port/tar.c
index 592b4fb7b0f..db462b90292 100644
--- a/src/port/tar.c
+++ b/src/port/tar.c
@@ -87,7 +87,7 @@ read_tar_number(const char *s, int len)
* be 512 bytes, per the tar standard.
*/
int
-tarChecksum(char *header)
+tarChecksum(const char *header)
{
int i,
sum;
@@ -104,6 +104,35 @@ tarChecksum(char *header)
return sum;
}
+/*
+ * Check validity of a tar header (assumed to be 512 bytes long).
+ * We verify the checksum and the magic number / version.
+ */
+bool
+isValidTarHeader(const char *header)
+{
+ int sum;
+ int chk = tarChecksum(header);
+
+ sum = read_tar_number(&header[TAR_OFFSET_CHECKSUM], 8);
+
+ if (sum != chk)
+ return false;
+
+ /* POSIX tar format */
+ if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar\0", 6) == 0 &&
+ memcmp(&header[TAR_OFFSET_VERSION], "00", 2) == 0)
+ return true;
+ /* GNU tar format */
+ if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar \0", 8) == 0)
+ return true;
+ /* not-quite-POSIX format written by pre-9.3 pg_dump */
+ if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar00\0", 8) == 0)
+ return true;
+
+ return false;
+}
+
/*
* Fill in the buffer pointed to by h with a tar format header. This buffer
pgsql-hackers by date: