Re: pg_waldump: support decoding of WAL inside tarfile - Mailing list pgsql-hackers
| From | Tom Lane |
|---|---|
| Subject | Re: pg_waldump: support decoding of WAL inside tarfile |
| Date | |
| Msg-id | 3049460.1775067940@sss.pgh.pa.us Whole thread Raw |
| In response to | Re: pg_waldump: support decoding of WAL inside tarfile (Thomas Munro <thomas.munro@gmail.com>) |
| Responses |
Re: pg_waldump: support decoding of WAL inside tarfile
|
| List | pgsql-hackers |
Thomas Munro <thomas.munro@gmail.com> writes:
> On Mon, Mar 30, 2026 at 11:23 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
>> I agree that this isn't too critical if the effects are confined to
>> pg_waldump. I believe that pg_basebackup and pg_verifybackup also use
>> astreamer_tar.c, but it's not clear to me if they'd ever be asked to
>> parse files made by tar(1) and not by our own sparseness-ignorant
>> tar-writing code. If they can be, that'd be a higher-priority reason
>> to fill in this gap.
> Yeah I can't see any reason why pg_verifybackup --wal-path=foo.tar
> won't suffer the same problem in the wild. Again, it's not the end of
> the world because it'll just fail and you'll probably eventually
> figure out why. So perhaps we should just improve our detection of
> archives that we can't handle?
After reading the POSIX spec for pax format (in the pax(1) man page),
I think it's absolutely essential that we reject files that contain
pax extension headers. Those can change the interpretation of the
following file header(s) in nearly arbitrary ways, so we have plenty
of problems besides this sparse-file issue if we just ignore them.
(Of course, later we can consider improving the code to handle them
correctly, but that ain't happening in time for v19.)
Also, if we are admitting the possibility that what we are reading
was made by a platform-supplied tar and not our own code, I think
it verges on lunacy to behave as though unsupported typeflags are
regular files.
So I think we need something more or less like the attached.
regards, tom lane
diff --git a/src/bin/pg_basebackup/astreamer_inject.c b/src/bin/pg_basebackup/astreamer_inject.c
index 14a96733bad..a5dff0ac1c6 100644
--- a/src/bin/pg_basebackup/astreamer_inject.c
+++ b/src/bin/pg_basebackup/astreamer_inject.c
@@ -224,8 +224,9 @@ astreamer_inject_file(astreamer *streamer, char *pathname, char *data,
strlcpy(member.pathname, pathname, MAXPGPATH);
member.size = len;
member.mode = pg_file_create_mode;
+ member.is_regular = true;
member.is_directory = false;
- member.is_link = false;
+ member.is_symlink = false;
member.linktarget[0] = '\0';
/*
diff --git a/src/bin/pg_verifybackup/astreamer_verify.c b/src/bin/pg_verifybackup/astreamer_verify.c
index 26c98186530..99bd69ce7c9 100644
--- a/src/bin/pg_verifybackup/astreamer_verify.c
+++ b/src/bin/pg_verifybackup/astreamer_verify.c
@@ -165,7 +165,7 @@ member_verify_header(astreamer *streamer, astreamer_member *member)
char pathname[MAXPGPATH];
/* We are only interested in normal files. */
- if (member->is_directory || member->is_link)
+ if (!member->is_regular)
return;
/*
diff --git a/src/bin/pg_waldump/archive_waldump.c b/src/bin/pg_waldump/archive_waldump.c
index ff1cc3fa5f0..e4a4bf44a7e 100644
--- a/src/bin/pg_waldump/archive_waldump.c
+++ b/src/bin/pg_waldump/archive_waldump.c
@@ -815,7 +815,7 @@ member_is_wal_file(astreamer_waldump *mystreamer, astreamer_member *member,
char *filename;
/* We are only interested in normal files */
- if (member->is_directory || member->is_link)
+ if (!member->is_regular)
return false;
if (strlen(member->pathname) < XLOG_FNAME_LEN)
diff --git a/src/fe_utils/astreamer_file.c b/src/fe_utils/astreamer_file.c
index 158e9a14f2c..0fca70a4f86 100644
--- a/src/fe_utils/astreamer_file.c
+++ b/src/fe_utils/astreamer_file.c
@@ -228,9 +228,13 @@ astreamer_extractor_content(astreamer *streamer, astreamer_member *member,
mystreamer->filename[fnamelen - 1] = '\0';
/* Dispatch based on file type. */
- if (member->is_directory)
+ if (member->is_regular)
+ mystreamer->file =
+ create_file_for_extract(mystreamer->filename,
+ member->mode);
+ else if (member->is_directory)
extract_directory(mystreamer->filename, member->mode);
- else if (member->is_link)
+ else if (member->is_symlink)
{
const char *linktarget = member->linktarget;
@@ -238,10 +242,6 @@ astreamer_extractor_content(astreamer *streamer, astreamer_member *member,
linktarget = mystreamer->link_map(linktarget);
extract_link(mystreamer->filename, linktarget);
}
- else
- mystreamer->file =
- create_file_for_extract(mystreamer->filename,
- member->mode);
/* Report output file change. */
if (mystreamer->report_output_file)
diff --git a/src/fe_utils/astreamer_tar.c b/src/fe_utils/astreamer_tar.c
index 3b094fc0328..3de6f8d6fb2 100644
--- a/src/fe_utils/astreamer_tar.c
+++ b/src/fe_utils/astreamer_tar.c
@@ -272,6 +272,9 @@ astreamer_tar_header(astreamer_tar_parser *mystreamer)
Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE);
+ /* Zero out fields of *member, just for consistency. */
+ memset(member, 0, sizeof(astreamer_member));
+
/* Check whether we've got a block of all zero bytes. */
for (i = 0; i < TAR_BLOCK_SIZE; ++i)
{
@@ -299,12 +302,28 @@ astreamer_tar_header(astreamer_tar_parser *mystreamer)
member->mode = read_tar_number(&buffer[TAR_OFFSET_MODE], 8);
member->uid = read_tar_number(&buffer[TAR_OFFSET_UID], 8);
member->gid = read_tar_number(&buffer[TAR_OFFSET_GID], 8);
- member->is_directory =
- (buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_DIRECTORY);
- member->is_link =
- (buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_SYMLINK);
- if (member->is_link)
- strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100);
+
+ switch (buffer[TAR_OFFSET_TYPEFLAG])
+ {
+ case TAR_FILETYPE_PLAIN:
+ case '\0': /* backwards compatibility hack, per POSIX */
+ member->is_regular = true;
+ break;
+ case TAR_FILETYPE_DIRECTORY:
+ member->is_directory = true;
+ break;
+ case TAR_FILETYPE_SYMLINK:
+ member->is_symlink = true;
+ strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100);
+ break;
+ case TAR_FILETYPE_PAX_EXTENDED:
+ case TAR_FILETYPE_PAX_EXTENDED_GLOBAL:
+ pg_fatal("pax extensions to tar format are not supported");
+ break;
+ default:
+ /* For special files, set none of the three is_xxx flags */
+ break;
+ }
/* Compute number of padding bytes. */
mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size);
diff --git a/src/include/fe_utils/astreamer.h b/src/include/fe_utils/astreamer.h
index f370ef62720..8329e4efbc5 100644
--- a/src/include/fe_utils/astreamer.h
+++ b/src/include/fe_utils/astreamer.h
@@ -83,8 +83,10 @@ typedef struct
mode_t mode;
uid_t uid;
gid_t gid;
+ /* note: special filetypes will set none of these flags */
+ bool is_regular;
bool is_directory;
- bool is_link;
+ bool is_symlink;
char linktarget[MAXPGPATH];
} astreamer_member;
diff --git a/src/include/pgtar.h b/src/include/pgtar.h
index eb93bdef5c4..d6c434e09b0 100644
--- a/src/include/pgtar.h
+++ b/src/include/pgtar.h
@@ -60,6 +60,8 @@ enum tarFileType
TAR_FILETYPE_PLAIN = '0',
TAR_FILETYPE_SYMLINK = '2',
TAR_FILETYPE_DIRECTORY = '5',
+ TAR_FILETYPE_PAX_EXTENDED = 'x',
+ TAR_FILETYPE_PAX_EXTENDED_GLOBAL = 'g',
};
extern enum tarError tarCreateHeader(char *h, const char *filename,
pgsql-hackers by date: