From 1d3dac88a58c799b2c047b52d3d66e97bbee2a8b Mon Sep 17 00:00:00 2001 From: Nitin Motiani Date: Sun, 3 May 2026 12:37:46 +0000 Subject: [PATCH v11 3/5] Fixes and refactors in pipe command Fix pclose bug with fdopen case for stdout by ensuring fclose is called. Add better error handling to pclose and show a clearer error message using wait_result_to_str() Changed pipe-command flag to pipe as recommended in review. Change the mode from 'ab' to 'w' for large object TOC. Use appendShellString in file substitutions for shell escaping. Refactor and document the code. --- src/bin/pg_dump/compress_gzip.c | 6 +- src/bin/pg_dump/compress_gzip.h | 2 +- src/bin/pg_dump/compress_io.c | 25 +++--- src/bin/pg_dump/compress_io.h | 6 +- src/bin/pg_dump/compress_lz4.c | 8 +- src/bin/pg_dump/compress_lz4.h | 2 +- src/bin/pg_dump/compress_none.c | 61 +++++++++---- src/bin/pg_dump/compress_none.h | 2 +- src/bin/pg_dump/compress_zstd.c | 8 +- src/bin/pg_dump/compress_zstd.h | 2 +- src/bin/pg_dump/pg_backup.h | 4 +- src/bin/pg_dump/pg_backup_archiver.c | 30 ++++++- src/bin/pg_dump/pg_backup_archiver.h | 2 +- src/bin/pg_dump/pg_backup_directory.c | 118 +++++++++++--------------- src/bin/pg_dump/pg_dump.c | 53 +++++------- src/bin/pg_dump/pg_dumpall.c | 9 ++ src/bin/pg_dump/pg_restore.c | 69 ++++++++------- 17 files changed, 225 insertions(+), 182 deletions(-) diff --git a/src/bin/pg_dump/compress_gzip.c b/src/bin/pg_dump/compress_gzip.c index 0ce15847d9a..6a02f9b3907 100644 --- a/src/bin/pg_dump/compress_gzip.c +++ b/src/bin/pg_dump/compress_gzip.c @@ -430,9 +430,9 @@ Gzip_open_write(const char *path, const char *mode, CompressFileHandle *CFH) void InitCompressFileHandleGzip(CompressFileHandle *CFH, const pg_compress_specification compression_spec, - bool path_is_pipe_command) + bool is_pipe) { - if (path_is_pipe_command) + if (is_pipe) pg_fatal("cPipe command not supported for Gzip"); CFH->open_func = Gzip_open; @@ -460,7 +460,7 @@ InitCompressorGzip(CompressorState *cs, void InitCompressFileHandleGzip(CompressFileHandle *CFH, const pg_compress_specification compression_spec, - bool path_is_pipe_command) + bool is_pipe) { pg_fatal("this build does not support compression with %s", "gzip"); } diff --git a/src/bin/pg_dump/compress_gzip.h b/src/bin/pg_dump/compress_gzip.h index f77c5c86c56..952c9223836 100644 --- a/src/bin/pg_dump/compress_gzip.h +++ b/src/bin/pg_dump/compress_gzip.h @@ -20,6 +20,6 @@ extern void InitCompressorGzip(CompressorState *cs, const pg_compress_specification compression_spec); extern void InitCompressFileHandleGzip(CompressFileHandle *CFH, const pg_compress_specification compression_spec, - bool path_is_pipe_command); + bool is_pipe); #endif /* _COMPRESS_GZIP_H_ */ diff --git a/src/bin/pg_dump/compress_io.c b/src/bin/pg_dump/compress_io.c index 88488186b34..b4d84ef17d1 100644 --- a/src/bin/pg_dump/compress_io.c +++ b/src/bin/pg_dump/compress_io.c @@ -192,28 +192,27 @@ free_keep_errno(void *p) */ CompressFileHandle * InitCompressFileHandle(const pg_compress_specification compression_spec, - bool path_is_pipe_command) + bool is_pipe) { CompressFileHandle *CFH; CFH = pg_malloc0_object(CompressFileHandle); /* - * Always set to non-compressed when path_is_pipe_command assuming that - * external compressor as part of pipe is more efficient. Can review in - * the future. + * Always set to non-compressed when is_pipe assuming that external + * compressor as part of pipe is more efficient. Can review in the future. */ - if (path_is_pipe_command) - InitCompressFileHandleNone(CFH, compression_spec, path_is_pipe_command); + if (is_pipe) + InitCompressFileHandleNone(CFH, compression_spec, is_pipe); else if (compression_spec.algorithm == PG_COMPRESSION_NONE) - InitCompressFileHandleNone(CFH, compression_spec, path_is_pipe_command); + InitCompressFileHandleNone(CFH, compression_spec, is_pipe); else if (compression_spec.algorithm == PG_COMPRESSION_GZIP) - InitCompressFileHandleGzip(CFH, compression_spec, path_is_pipe_command); + InitCompressFileHandleGzip(CFH, compression_spec, is_pipe); else if (compression_spec.algorithm == PG_COMPRESSION_LZ4) - InitCompressFileHandleLZ4(CFH, compression_spec, path_is_pipe_command); + InitCompressFileHandleLZ4(CFH, compression_spec, is_pipe); else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD) - InitCompressFileHandleZstd(CFH, compression_spec, path_is_pipe_command); + InitCompressFileHandleZstd(CFH, compression_spec, is_pipe); return CFH; } @@ -247,7 +246,7 @@ check_compressed_file(const char *path, char **fname, char *ext) */ CompressFileHandle * InitDiscoverCompressFileHandle(const char *path, const char *mode, - bool path_is_pipe_command) + bool is_pipe) { CompressFileHandle *CFH = NULL; struct stat st; @@ -263,7 +262,7 @@ InitDiscoverCompressFileHandle(const char *path, const char *mode, /* * If the path is a pipe command, the compression algorithm is none. */ - if (!path_is_pipe_command) + if (!is_pipe) { if (hasSuffix(fname, ".gz")) compression_spec.algorithm = PG_COMPRESSION_GZIP; @@ -284,7 +283,7 @@ InitDiscoverCompressFileHandle(const char *path, const char *mode, } } - CFH = InitCompressFileHandle(compression_spec, path_is_pipe_command); + CFH = InitCompressFileHandle(compression_spec, is_pipe); errno = 0; if (!CFH->open_func(fname, -1, mode, CFH)) { diff --git a/src/bin/pg_dump/compress_io.h b/src/bin/pg_dump/compress_io.h index bd0fc2634dc..3857eff2179 100644 --- a/src/bin/pg_dump/compress_io.h +++ b/src/bin/pg_dump/compress_io.h @@ -189,7 +189,7 @@ struct CompressFileHandle /* * Compression specification for this file handle. */ - bool path_is_pipe_command; + bool is_pipe; /* * Private data to be used by the compressor. @@ -201,7 +201,7 @@ struct CompressFileHandle * Initialize a compress file handle with the requested compression. */ extern CompressFileHandle *InitCompressFileHandle(const pg_compress_specification compression_spec, - bool path_is_pipe_command); + bool is_pipe); /* * Initialize a compress file stream. Infer the compression algorithm @@ -210,6 +210,6 @@ extern CompressFileHandle *InitCompressFileHandle(const pg_compress_specificatio */ extern CompressFileHandle *InitDiscoverCompressFileHandle(const char *path, const char *mode, - bool path_is_pipe_command); + bool is_pipe); extern bool EndCompressFileHandle(CompressFileHandle *CFH); #endif diff --git a/src/bin/pg_dump/compress_lz4.c b/src/bin/pg_dump/compress_lz4.c index 2bc4c37c5db..79595556715 100644 --- a/src/bin/pg_dump/compress_lz4.c +++ b/src/bin/pg_dump/compress_lz4.c @@ -767,11 +767,11 @@ LZ4Stream_open_write(const char *path, const char *mode, CompressFileHandle *CFH void InitCompressFileHandleLZ4(CompressFileHandle *CFH, const pg_compress_specification compression_spec, - bool path_is_pipe_command) + bool is_pipe) { LZ4State *state; - if (path_is_pipe_command) + if (is_pipe) pg_fatal("Pipe command not supported for LZ4"); CFH->open_func = LZ4Stream_open; @@ -789,7 +789,7 @@ InitCompressFileHandleLZ4(CompressFileHandle *CFH, if (CFH->compression_spec.level >= 0) state->prefs.compressionLevel = CFH->compression_spec.level; - CFH->path_is_pipe_command = path_is_pipe_command; + CFH->is_pipe = is_pipe; CFH->private_data = state; } @@ -804,7 +804,7 @@ InitCompressorLZ4(CompressorState *cs, void InitCompressFileHandleLZ4(CompressFileHandle *CFH, const pg_compress_specification compression_spec, - bool path_is_pipe_command) + bool is_pipe) { pg_fatal("this build does not support compression with %s", "LZ4"); } diff --git a/src/bin/pg_dump/compress_lz4.h b/src/bin/pg_dump/compress_lz4.h index 490141ee8a1..2c235cf3a50 100644 --- a/src/bin/pg_dump/compress_lz4.h +++ b/src/bin/pg_dump/compress_lz4.h @@ -20,6 +20,6 @@ extern void InitCompressorLZ4(CompressorState *cs, const pg_compress_specification compression_spec); extern void InitCompressFileHandleLZ4(CompressFileHandle *CFH, const pg_compress_specification compression_spec, - bool path_is_pipe_command); + bool is_pipe); #endif /* _COMPRESS_LZ4_H_ */ diff --git a/src/bin/pg_dump/compress_none.c b/src/bin/pg_dump/compress_none.c index 8c2f95b520d..2dae62aadd4 100644 --- a/src/bin/pg_dump/compress_none.c +++ b/src/bin/pg_dump/compress_none.c @@ -14,6 +14,7 @@ #include "postgres_fe.h" #include +#include "port.h" #include "compress_none.h" #include "pg_backup_utils.h" @@ -210,13 +211,31 @@ close_none(CompressFileHandle *CFH) if (fp) { - errno = 0; - if (CFH->path_is_pipe_command) + if (CFH->is_pipe) + { ret = pclose(fp); + if (ret != 0) + { + /* + * For pipe commands, pclose() returns the exit status of the + * child process. If the shell command itself fails (e.g. + * "command not found"), pclose() will return a non-zero exit + * status, but errno will likely remain 0 (Success). We use + * wait_result_to_str to decode the status and pg_fatal to + * prevent the caller from logging a generic and misleading + * "could not close file: Success" message. + */ + char *reason = wait_result_to_str(ret); + + pg_fatal("pipe command failed: %s", reason); + } + } else + { ret = fclose(fp); - if (ret != 0) - pg_log_error("could not close file: %m"); + if (ret != 0) + pg_fatal("could not close file: %m"); + } } return ret == 0; @@ -228,6 +247,23 @@ eof_none(CompressFileHandle *CFH) return feof((FILE *) CFH->private_data) != 0; } +static FILE * +open_handle_none(const char *path, const char *mode, bool is_pipe) +{ + if (is_pipe) + { + /* + * If the path is a pipe, we use popen(). Note that we do not track + * the child PID for cleanup during fatal errors. We intentionally + * rely on standard POSIX semantics: if pg_dump crashes, the OS will + * close our end of the pipe, sending EOF to the child process, which + * will then cleanly exit on its own. + */ + return popen(path, mode); + } + return fopen(path, mode); +} + static bool open_none(const char *path, int fd, const char *mode, CompressFileHandle *CFH) { @@ -236,6 +272,7 @@ open_none(const char *path, int fd, const char *mode, CompressFileHandle *CFH) if (fd >= 0) { int dup_fd = dup(fd); + if (dup_fd < 0) return false; CFH->private_data = fdopen(dup_fd, mode); @@ -247,10 +284,7 @@ open_none(const char *path, int fd, const char *mode, CompressFileHandle *CFH) } else { - if (CFH->path_is_pipe_command) - CFH->private_data = popen(path, mode); - else - CFH->private_data = fopen(path, mode); + CFH->private_data = open_handle_none(path, mode, CFH->is_pipe); if (CFH->private_data == NULL) return false; @@ -265,12 +299,9 @@ open_write_none(const char *path, const char *mode, CompressFileHandle *CFH) Assert(CFH->private_data == NULL); pg_log_debug("Opening %s, pipe is %s", - path, CFH->path_is_pipe_command ? "true" : "false"); + path, CFH->is_pipe ? "true" : "false"); - if (CFH->path_is_pipe_command) - CFH->private_data = popen(path, mode); - else - CFH->private_data = fopen(path, mode); + CFH->private_data = open_handle_none(path, mode, CFH->is_pipe); if (CFH->private_data == NULL) return false; @@ -285,7 +316,7 @@ open_write_none(const char *path, const char *mode, CompressFileHandle *CFH) void InitCompressFileHandleNone(CompressFileHandle *CFH, const pg_compress_specification compression_spec, - bool path_is_pipe_command) + bool is_pipe) { CFH->open_func = open_none; CFH->open_write_func = open_write_none; @@ -297,7 +328,7 @@ InitCompressFileHandleNone(CompressFileHandle *CFH, CFH->eof_func = eof_none; CFH->get_error_func = get_error_none; - CFH->path_is_pipe_command = path_is_pipe_command; + CFH->is_pipe = is_pipe; CFH->private_data = NULL; } diff --git a/src/bin/pg_dump/compress_none.h b/src/bin/pg_dump/compress_none.h index d898a2d411c..57943ceff7f 100644 --- a/src/bin/pg_dump/compress_none.h +++ b/src/bin/pg_dump/compress_none.h @@ -20,6 +20,6 @@ extern void InitCompressorNone(CompressorState *cs, const pg_compress_specification compression_spec); extern void InitCompressFileHandleNone(CompressFileHandle *CFH, const pg_compress_specification compression_spec, - bool path_is_pipe_command); + bool is_pipe); #endif /* _COMPRESS_NONE_H_ */ diff --git a/src/bin/pg_dump/compress_zstd.c b/src/bin/pg_dump/compress_zstd.c index e4830d35ec0..57c4ad16500 100644 --- a/src/bin/pg_dump/compress_zstd.c +++ b/src/bin/pg_dump/compress_zstd.c @@ -28,7 +28,7 @@ InitCompressorZstd(CompressorState *cs, const pg_compress_specification compress void InitCompressFileHandleZstd(CompressFileHandle *CFH, const pg_compress_specification compression_spec, - bool path_is_pipe_command) + bool is_pipe) { pg_fatal("this build does not support compression with %s", "ZSTD"); } @@ -576,9 +576,9 @@ Zstd_get_error(CompressFileHandle *CFH) void InitCompressFileHandleZstd(CompressFileHandle *CFH, const pg_compress_specification compression_spec, - bool path_is_pipe_command) + bool is_pipe) { - if (path_is_pipe_command) + if (is_pipe) pg_fatal("Pipe command not supported for Zstd"); CFH->open_func = Zstd_open; @@ -592,7 +592,7 @@ InitCompressFileHandleZstd(CompressFileHandle *CFH, CFH->get_error_func = Zstd_get_error; CFH->compression_spec = compression_spec; - CFH->path_is_pipe_command = path_is_pipe_command; + CFH->is_pipe = is_pipe; CFH->private_data = NULL; } diff --git a/src/bin/pg_dump/compress_zstd.h b/src/bin/pg_dump/compress_zstd.h index 1f23e7266bf..8b06657bc80 100644 --- a/src/bin/pg_dump/compress_zstd.h +++ b/src/bin/pg_dump/compress_zstd.h @@ -21,6 +21,6 @@ extern void InitCompressorZstd(CompressorState *cs, const pg_compress_specification compression_spec); extern void InitCompressFileHandleZstd(CompressFileHandle *CFH, const pg_compress_specification compression_spec, - bool path_is_pipe_command); + bool is_pipe); #endif /* COMPRESS_ZSTD_H */ diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index 6466bd4bded..8efeb549d76 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -316,7 +316,7 @@ extern void ProcessArchiveRestoreOptions(Archive *AHX); extern void RestoreArchive(Archive *AHX, bool append_data); /* Open an existing archive */ -extern Archive *OpenArchive(const char *FileSpec, const ArchiveFormat fmt, bool FileSpecIsPipe); +extern Archive *OpenArchive(const char *FileSpec, const ArchiveFormat fmt, bool is_pipe); /* Create a new archive */ extern Archive *CreateArchive(const char *FileSpec, const ArchiveFormat fmt, @@ -324,7 +324,7 @@ extern Archive *CreateArchive(const char *FileSpec, const ArchiveFormat fmt, bool dosync, ArchiveMode mode, SetupWorkerPtrType setupDumpWorker, DataDirSyncMethod sync_method, - bool FileSpecIsPipe); + bool is_pipe); /* The --list option */ extern void PrintTOCSummary(Archive *AHX); diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 4ef9dae49ed..107173f2b53 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -1744,7 +1744,19 @@ SetOutput(ArchiveHandle *AH, const char *filename, else mode = PG_BINARY_W; - CFH = InitCompressFileHandle(compression_spec, AH->fSpecIsPipe); + /* + * The output handle (usually stdout) should never be a pipe command + * managed by our popen logic, even if the archive itself is a pipe. Our + * pipe command implementation for directory mode is a template for the + * data files, not for this primary output stream. + * + * Furthermore, marking this as a pipe command would cause it to be closed + * with pclose() instead of fclose(). Since this handle is opened via + * fdopen() (for stdout) or fopen() (for a regular file), using pclose() + * on it is a bug that causes failures on BSD-based systems (like FreeBSD + * or macOS). + */ + CFH = InitCompressFileHandle(compression_spec, false); if (!CFH->open_func(filename, fn, mode, CFH)) { @@ -2442,7 +2454,7 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt, else AH->fSpec = NULL; - AH->fSpecIsPipe = FileSpecIsPipe; + AH->is_pipe = FileSpecIsPipe; AH->currUser = NULL; /* unknown */ AH->currSchema = NULL; /* ditto */ @@ -2463,7 +2475,19 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt, /* Open stdout with no compression for AH output handle */ out_compress_spec.algorithm = PG_COMPRESSION_NONE; - CFH = InitCompressFileHandle(out_compress_spec, AH->fSpecIsPipe); + + /* + * The output handle (usually stdout) should never be a pipe command + * managed by our popen logic, even if the archive itself is a pipe. Our + * pipe command implementation for directory mode is a template for the + * data files, not for this primary output stream. + * + * Furthermore, marking this as a pipe command would cause it to be closed + * with pclose() instead of fclose(). Since this handle is opened via + * fdopen() (for stdout), using pclose() on it is a bug that causes + * failures on BSD-based systems (like FreeBSD or macOS). + */ + CFH = InitCompressFileHandle(out_compress_spec, false); if (!CFH->open_func(NULL, fileno(stdout), PG_BINARY_A, CFH)) pg_fatal("could not open stdout for appending: %m"); AH->OF = CFH; diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h index 9fdb67c109d..0384b39bd97 100644 --- a/src/bin/pg_dump/pg_backup_archiver.h +++ b/src/bin/pg_dump/pg_backup_archiver.h @@ -301,7 +301,7 @@ struct _archiveHandle int loCount; /* # of LOs restored */ char *fSpec; /* Archive File Spec */ - bool fSpecIsPipe; /* fSpec is a pipe command template requiring + bool is_pipe; /* fSpec is a pipe command template requiring * replacing %f with file name */ FILE *FH; /* General purpose file handle */ void *OF; /* Output file */ diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c index 2b18c3c8270..d3b9be5317e 100644 --- a/src/bin/pg_dump/pg_backup_directory.c +++ b/src/bin/pg_dump/pg_backup_directory.c @@ -43,6 +43,7 @@ #include "common/percentrepl.h" #include "compress_io.h" #include "dumputils.h" +#include "fe_utils/string_utils.h" #include "parallel.h" #include "pg_backup_utils.h" @@ -158,7 +159,7 @@ InitArchiveFmt_Directory(ArchiveHandle *AH) if (AH->mode == archModeWrite) { - if (!AH->fSpecIsPipe) /* no checks for pipe */ + if (!AH->is_pipe) /* no checks for pipe */ { /* we accept an empty existing directory */ create_or_open_dir(ctx->directory); @@ -171,7 +172,7 @@ InitArchiveFmt_Directory(ArchiveHandle *AH) setFilePath(AH, fname, "toc.dat"); - tocFH = InitDiscoverCompressFileHandle(fname, PG_BINARY_R, AH->fSpecIsPipe); + tocFH = InitDiscoverCompressFileHandle(fname, PG_BINARY_R, AH->is_pipe); if (tocFH == NULL) pg_fatal("could not open input file \"%s\": %m", fname); @@ -299,7 +300,7 @@ _StartData(ArchiveHandle *AH, TocEntry *te) setFilePath(AH, fname, tctx->filename); - ctx->dataFH = InitCompressFileHandle(AH->compression_spec, AH->fSpecIsPipe); + ctx->dataFH = InitCompressFileHandle(AH->compression_spec, AH->is_pipe); if (!ctx->dataFH->open_write_func(fname, PG_BINARY_W, ctx->dataFH)) pg_fatal("could not open output file \"%s\": %m", fname); @@ -357,7 +358,7 @@ _PrintFileData(ArchiveHandle *AH, char *filename) if (!filename) return; - CFH = InitDiscoverCompressFileHandle(filename, PG_BINARY_R, AH->fSpecIsPipe); + CFH = InitDiscoverCompressFileHandle(filename, PG_BINARY_R, AH->is_pipe); if (!CFH) pg_fatal("could not open input file \"%s\": %m", filename); @@ -420,7 +421,7 @@ _LoadLOs(ArchiveHandle *AH, TocEntry *te) else setFilePath(AH, tocfname, tctx->filename); - CFH = ctx->LOsTocFH = InitDiscoverCompressFileHandle(tocfname, PG_BINARY_R, AH->fSpecIsPipe); + CFH = ctx->LOsTocFH = InitDiscoverCompressFileHandle(tocfname, PG_BINARY_R, AH->is_pipe); if (ctx->LOsTocFH == NULL) pg_fatal("could not open large object TOC file \"%s\" for input: %m", @@ -431,7 +432,6 @@ _LoadLOs(ArchiveHandle *AH, TocEntry *te) { char lofname[MAXPGPATH + 1]; char path[MAXPGPATH]; - char *pipe; /* Can't overflow because line and lofname are the same length */ if (sscanf(line, "%u %" CppAsString2(MAXPGPATH) "s\n", &oid, lofname) != 2) @@ -440,20 +440,8 @@ _LoadLOs(ArchiveHandle *AH, TocEntry *te) StartRestoreLO(AH, oid, AH->public.ropt->dropSchema); - /* - * XXX : Create a helper function for blob files naming common to - * _LoadLOs an _StartLO. - */ - if (AH->fSpecIsPipe) - { - pipe = replace_percent_placeholders(ctx->directory, "pipe-command", "f", lofname); - strcpy(path, pipe); - pfree(pipe); - } - else - { - snprintf(path, MAXPGPATH, "%s/%s", ctx->directory, lofname); - } + setFilePath(AH, path, lofname); + _PrintFileData(AH, path); EndRestoreLO(AH, oid); } @@ -564,7 +552,7 @@ _CloseArchive(ArchiveHandle *AH) /* The TOC is always created uncompressed */ compression_spec.algorithm = PG_COMPRESSION_NONE; - tocFH = InitCompressFileHandle(compression_spec, AH->fSpecIsPipe); + tocFH = InitCompressFileHandle(compression_spec, AH->is_pipe); if (!tocFH->open_write_func(fname, PG_BINARY_W, tocFH)) pg_fatal("could not open output file \"%s\": %m", fname); ctx->dataFH = tocFH; @@ -631,39 +619,27 @@ _StartLOs(ArchiveHandle *AH, TocEntry *te) /* The LO TOC file is never compressed */ compression_spec.algorithm = PG_COMPRESSION_NONE; - ctx->LOsTocFH = InitCompressFileHandle(compression_spec, AH->fSpecIsPipe); + ctx->LOsTocFH = InitCompressFileHandle(compression_spec, AH->is_pipe); /* - * XXX: We can probably simplify this code by using the mode 'w' for all - * cases. The current implementation is due to historical reason that the - * mode for the LOs TOC file has been "ab" from the start. That is - * something we can't do for pipe-command as popen only supports read and - * write. So here a different mode is used for pipes. + * We use 'w' (PG_BINARY_W) mode for the LOs TOC file in all cases. + * Historically, the mode for this file was "ab". However, append mode is + * entirely redundant due to how large objects are partitioned. * - * But in future we can evaluate using 'w' for everything.there is one - * ToCEntry There is only one ToCEntry per blob group. And it is written - * by @WriteDataChunksForToCEntry. This function calls _StartLOs once - * before the dumper function and and _EndLOs once after the dumper. And - * the dumper dumps all the LOs in the group. So a blob_NNN.toc is only - * opened once and closed after all the entries are written. Therefore the - * mode can be made 'w' for all the cases. We tested changing the mode to - * PG_BINARY_W and the tests passed. But in case there are some missing - * scenarios, we have not made that change here. Instead for now only - * doing it for the pipe command. + * pg_dump splits large objects into chunks of up to 1000 blobs per + * archive entry. Each chunk receives a completely unique dumpId, and the + * TOC file is named using that ID (e.g., blobs_123.toc). Furthermore, + * WriteDataChunksForTocEntry ensures a strict sequential lifecycle for + * each entry: it calls _StartLOs (opens the file), then the dumper + * function (writes the chunk), and finally _EndLOs (closes the file). * - * Another alternative is to keep the 'ab' mode for regular files and use - * 'w' mode for pipe files but now also cache the pipe handle to keep it - * open till all the LOs in the dump group are done. This is not needed - * because of the same reason listed above that a file handle is only - * opened once. In short there are 3 solutions : 1. Change the mode for - * everything (preferred) 2. Change it only for pipe-command (current) 3. - * Change it for pipe-command and then cache those handles and close them - * in the end (not needed). + * Because a blobs_NNN.toc file is guaranteed to be unique and is only + * opened exactly once, written to sequentially, and then closed forever, + * there is no scenario where "ab" is required. This change to "w" is + * necessary because popen() for pipe-commands only supports "r" and "w". */ - if (AH->fSpecIsPipe) - mode = PG_BINARY_W; - else - mode = "ab"; + mode = PG_BINARY_W; + if (!ctx->LOsTocFH->open_write_func(fname, mode, ctx->LOsTocFH)) pg_fatal("could not open output file \"%s\": %m", fname); } @@ -678,22 +654,12 @@ _StartLO(ArchiveHandle *AH, TocEntry *te, Oid oid) { lclContext *ctx = (lclContext *) AH->formatData; char fname[MAXPGPATH]; - char *pipe; char blob_name[MAXPGPATH]; - if (AH->fSpecIsPipe) - { - snprintf(blob_name, MAXPGPATH, "blob_%u.dat", oid); - pipe = replace_percent_placeholders(ctx->directory, "pipe-command", "f", blob_name); - strcpy(fname, pipe); - pfree(pipe); - } - else - { - snprintf(fname, MAXPGPATH, "%s/blob_%u.dat", ctx->directory, oid); - } + snprintf(blob_name, MAXPGPATH, "blob_%u.dat", oid); + setFilePath(AH, fname, blob_name); - ctx->dataFH = InitCompressFileHandle(AH->compression_spec, AH->fSpecIsPipe); + ctx->dataFH = InitCompressFileHandle(AH->compression_spec, AH->is_pipe); if (!ctx->dataFH->open_write_func(fname, PG_BINARY_W, ctx->dataFH)) pg_fatal("could not open output file \"%s\": %m", fname); } @@ -752,11 +718,30 @@ setFilePath(ArchiveHandle *AH, char *buf, const char *relativeFilename) dname = ctx->directory; - if (AH->fSpecIsPipe) + if (AH->is_pipe) { - pipe = replace_percent_placeholders(dname, "pipe-command", "f", relativeFilename); + PQExpBuffer esc_filename = createPQExpBuffer(); + + /* + * Security: Escape the filename before substituting it into the + * command string. This prevents shell injection if the filename + * contains special characters (like spaces, quotes, or semicolons). + * While internal filenames used by directory format (like toc.dat, + * 1234.dat, blob_567.dat) are currently safe, this follows defensive + * programming practices to ensure the pipeline remains secure even if + * internal naming conventions change. + */ + appendShellString(esc_filename, relativeFilename); + + pipe = replace_percent_placeholders(dname, "pipe", "f", esc_filename->data); + + if (strlen(pipe) >= MAXPGPATH) + pg_fatal("pipe command too long: \"%s\"", pipe); + strcpy(buf, pipe); + pfree(pipe); + destroyPQExpBuffer(esc_filename); } else /* replace all ocurrences of %f in dname with * relativeFilename */ @@ -809,23 +794,18 @@ _PrepParallelRestore(ArchiveHandle *AH) * only need an approximate indicator of that. */ setFilePath(AH, fname, tctx->filename); - pg_log_error("filename: %s", fname); if (stat(fname, &st) == 0) te->dataLength = st.st_size; else if (AH->compression_spec.algorithm != PG_COMPRESSION_NONE) { - if (AH->fSpecIsPipe) - pg_log_error("pipe and compressed"); if (AH->compression_spec.algorithm == PG_COMPRESSION_GZIP) strlcat(fname, ".gz", sizeof(fname)); else if (AH->compression_spec.algorithm == PG_COMPRESSION_LZ4) strlcat(fname, ".lz4", sizeof(fname)); else if (AH->compression_spec.algorithm == PG_COMPRESSION_ZSTD) { - pg_log_error("filename: %s", fname); strlcat(fname, ".zst", sizeof(fname)); - pg_log_error("filename: %s", fname); } if (stat(fname, &st) == 0) diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 7345e6c7a4b..21157c568b8 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -419,7 +419,8 @@ main(int argc, char **argv) { int c; const char *filename = NULL; - bool filename_is_pipe = false; + char *pipe_command = NULL; + bool is_pipe = false; const char *format = "p"; TableInfo *tblinfo; int numTables; @@ -536,7 +537,7 @@ main(int argc, char **argv) {"exclude-extension", required_argument, NULL, 17}, {"sequence-data", no_argument, &dopt.sequence_data, 1}, {"restrict-key", required_argument, NULL, 25}, - {"pipe-command", required_argument, NULL, 26}, + {"pipe", required_argument, NULL, 26}, {NULL, 0, NULL, 0} }; @@ -608,14 +609,9 @@ main(int argc, char **argv) break; case 'f': - if (filename != NULL) - { - pg_log_error_hint("Only one of [--file, --pipe-command] allowed"); - exit_nicely(1); - } filename = pg_strdup(optarg); - filename_is_pipe = false; /* it already is, setting again - * here just for clarity */ + is_pipe = false; /* it already is, setting again here just + * for clarity */ break; case 'F': @@ -809,13 +805,8 @@ main(int argc, char **argv) break; case 26: /* pipe command */ - if (filename != NULL) - { - pg_log_error_hint("Only one of [--file, --pipe-command] allowed"); - exit_nicely(1); - } - filename = pg_strdup(optarg); - filename_is_pipe = true; + pipe_command = pg_strdup(optarg); + is_pipe = true; break; default: @@ -825,6 +816,10 @@ main(int argc, char **argv) } } + if (filename && pipe_command) + pg_fatal("options %s and %s cannot be used together", + "-f/--file", "--pipe"); + /* * Non-option argument specifies database name as long as it wasn't * already specified with -d / --dbname @@ -926,26 +921,20 @@ main(int argc, char **argv) else if (dopt.restrict_key) pg_fatal("option %s can only be used with %s", "--restrict-key", "--format=plain"); - if (filename_is_pipe && archiveFormat != archDirectory) - { - pg_log_error_hint("Option --pipe-command is only supported with directory format."); - exit_nicely(1); - } + if (is_pipe && archiveFormat != archDirectory) + pg_fatal("option --pipe is only supported with directory format"); - if (filename_is_pipe && strcmp(compression_algorithm_str, "none") != 0) - { - pg_log_error_hint("Option --pipe-command is not supported with any compression type."); - exit_nicely(1); - } + if (is_pipe && strcmp(compression_algorithm_str, "none") != 0) + pg_fatal("option --pipe is not supported with any compression type"); /* * Custom and directory formats are compressed by default with gzip when * available, not the others. If gzip is not available, no compression is - * done by default. If directory format is being used with pipe-command, - * no compression is done. + * done by default. If directory format is being used with pipe, no + * compression is done. */ if ((archiveFormat == archCustom || archiveFormat == archDirectory) && - !filename_is_pipe && !user_compression_defined) + !is_pipe && !user_compression_defined) { #ifdef HAVE_LIBZ compression_algorithm_str = "gzip"; @@ -994,8 +983,8 @@ main(int argc, char **argv) pg_fatal("parallel backup only supported by the directory format"); /* Open the output file */ - fout = CreateArchive(filename, archiveFormat, compression_spec, - dosync, archiveMode, setupDumpWorker, sync_method, filename_is_pipe); + fout = CreateArchive(is_pipe ? pipe_command : filename, archiveFormat, compression_spec, + dosync, archiveMode, setupDumpWorker, sync_method, is_pipe); /* Make dump options accessible right away */ SetArchiveOptions(fout, &dopt, NULL); @@ -1327,6 +1316,8 @@ help(const char *progname) printf(_("\nGeneral options:\n")); printf(_(" -f, --file=FILENAME output file or directory name\n")); + printf(_(" --pipe=COMMAND execute command for each output file and\n" + " write data to it via pipe\n")); printf(_(" -F, --format=c|d|t|p output file format (custom, directory, tar,\n" " plain text (default))\n")); printf(_(" -j, --jobs=NUM use this many parallel jobs to dump\n")); diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index 2d551365180..bf69a44fa23 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -298,6 +298,15 @@ main(int argc, char *argv[]) case 'F': format_name = pg_strdup(optarg); break; + + /* + * Note: support for --pipe is currently skipped for + * pg_dumpall due to the complexity of avoiding path + * collisions between multiple databases and coordinating + * nested directory structures. This could be considered as a + * future enhancement. + */ + case 'g': globals_only = true; break; diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c index cbac8bc2520..08a642d14b7 100644 --- a/src/bin/pg_dump/pg_restore.c +++ b/src/bin/pg_dump/pg_restore.c @@ -60,11 +60,11 @@ static void usage(const char *progname); static void read_restore_filters(const char *filename, RestoreOptions *opts); static bool file_exists_in_directory(const char *dir, const char *filename); static int restore_one_database(const char *inputFileSpec, RestoreOptions *opts, - int numWorkers, bool append_data, bool filespec_is_pipe); -static int restore_global_objects(const char *inputFileSpec, RestoreOptions *opts, bool filespec_is_pipe); + int numWorkers, bool append_data, bool is_pipe); +static int restore_global_objects(const char *inputFileSpec, RestoreOptions *opts, bool is_pipe); static int restore_all_databases(const char *inputFileSpec, - SimpleStringList db_exclude_patterns, RestoreOptions *opts, int numWorkers, bool filespec_is_pipe); + SimpleStringList db_exclude_patterns, RestoreOptions *opts, int numWorkers, bool is_pipe); static int get_dbnames_list_to_restore(PGconn *conn, SimplePtrList *dbname_oid_list, SimpleStringList db_exclude_patterns); @@ -87,13 +87,14 @@ main(int argc, char **argv) RestoreOptions *opts; int c; int numWorkers = 1; - char *inputFileSpec; + char *inputFileSpec = NULL; + char *pipe_command = NULL; bool data_only = false; bool schema_only = false; int n_errors = 0; bool globals_only = false; SimpleStringList db_exclude_patterns = {NULL, NULL}; - bool filespec_is_pipe = false; + bool is_pipe = false; static int disable_triggers = 0; static int enable_row_security = 0; static int if_exists = 0; @@ -174,7 +175,7 @@ main(int argc, char **argv) {"filter", required_argument, NULL, 4}, {"restrict-key", required_argument, NULL, 6}, {"exclude-database", required_argument, NULL, 7}, - {"pipe-command", required_argument, NULL, 8}, + {"pipe", required_argument, NULL, 8}, {NULL, 0, NULL, 0} }; @@ -358,9 +359,9 @@ main(int argc, char **argv) simple_string_list_append(&db_exclude_patterns, optarg); break; - case 8: /* pipe-command */ - inputFileSpec = pg_strdup(optarg); - filespec_is_pipe = true; + case 8: /* pipe */ + pipe_command = pg_strdup(optarg); + is_pipe = true; break; default: @@ -371,25 +372,21 @@ main(int argc, char **argv) } /* - * Get file name from command line. Note that filename argument and - * pipe-command can't both be set. + * Get file name from command line. Note that filename argument and pipe + * can't both be set. */ if (optind < argc) { - if (filespec_is_pipe) - { - pg_log_error_hint("Only one of [filespec, --pipe-command] allowed"); - exit_nicely(1); - } + if (is_pipe) + pg_fatal("cannot specify both an input file and --pipe"); inputFileSpec = argv[optind++]; } /* - * Even if the file argument is not provided, if the pipe-command is - * specified, we need to use that as the file arg and not fallback to - * stdio. + * Even if the file argument is not provided, if the pipe is specified, we + * need to use that as the file arg and not fallback to stdio. */ - else if (!filespec_is_pipe) + else if (!is_pipe) { inputFileSpec = NULL; } @@ -539,10 +536,20 @@ main(int argc, char **argv) pg_fatal("unrecognized archive format \"%s\"; please specify \"c\", \"d\", or \"t\"", opts->formatName); } + else + opts->format = archUnknown; + + if (is_pipe && opts->format != archDirectory) + pg_fatal("option --pipe is only supported with directory format"); /* * If toc.glo file is present, then restore all the databases from * map.dat, but skip restoring those matching --exclude-database patterns. + * + * Note: support for --pipe is currently skipped for cluster archives + * (archives containing toc.glo) due to the added complexity of handling + * nested directory paths and multiple databases. This could be considered + * as a future enhancement. */ if (inputFileSpec != NULL && (file_exists_in_directory(inputFileSpec, "toc.glo"))) @@ -619,7 +626,7 @@ main(int argc, char **argv) snprintf(global_path, MAXPGPATH, "%s/toc.glo", inputFileSpec); if (!no_globals) - n_errors = restore_global_objects(global_path, tmpopts, filespec_is_pipe); + n_errors = restore_global_objects(global_path, tmpopts, is_pipe); else pg_log_info("skipping restore of global objects because %s was specified", "--no-globals"); @@ -630,8 +637,8 @@ main(int argc, char **argv) else { /* Now restore all the databases from map.dat */ - n_errors = n_errors + restore_all_databases(inputFileSpec, db_exclude_patterns, - opts, numWorkers, filespec_is_pipe); + n_errors = n_errors + restore_all_databases(is_pipe ? pipe_command : inputFileSpec, db_exclude_patterns, + opts, numWorkers, is_pipe); } /* Free db pattern list. */ @@ -651,7 +658,7 @@ main(int argc, char **argv) "-g/--globals-only"); /* Process if toc.glo file does not exist. */ - n_errors = restore_one_database(inputFileSpec, opts, numWorkers, false, filespec_is_pipe); + n_errors = restore_one_database(is_pipe ? pipe_command : inputFileSpec, opts, numWorkers, false, is_pipe); } /* Done, print a summary of ignored errors during restore. */ @@ -670,7 +677,7 @@ main(int argc, char **argv) * This restore all global objects. */ static int -restore_global_objects(const char *inputFileSpec, RestoreOptions *opts, bool filespec_is_pipe) +restore_global_objects(const char *inputFileSpec, RestoreOptions *opts, bool is_pipe) { Archive *AH; int nerror = 0; @@ -679,7 +686,7 @@ restore_global_objects(const char *inputFileSpec, RestoreOptions *opts, bool fil opts->format = archCustom; opts->txn_size = 0; - AH = OpenArchive(inputFileSpec, opts->format, filespec_is_pipe); + AH = OpenArchive(inputFileSpec, opts->format, is_pipe); SetArchiveOptions(AH, NULL, opts); @@ -716,12 +723,12 @@ restore_global_objects(const char *inputFileSpec, RestoreOptions *opts, bool fil */ static int restore_one_database(const char *inputFileSpec, RestoreOptions *opts, - int numWorkers, bool append_data, bool filespec_is_pipe) + int numWorkers, bool append_data, bool is_pipe) { Archive *AH; int n_errors; - AH = OpenArchive(inputFileSpec, opts->format, filespec_is_pipe); + AH = OpenArchive(inputFileSpec, opts->format, is_pipe); SetArchiveOptions(AH, NULL, opts); @@ -777,6 +784,8 @@ usage(const char *progname) printf(_("\nGeneral options:\n")); printf(_(" -d, --dbname=NAME connect to database name\n")); printf(_(" -f, --file=FILENAME output file name (- for stdout)\n")); + printf(_(" --pipe=COMMAND execute command for each input file and\n" + " read data from it via pipe\n")); printf(_(" -F, --format=c|d|t backup file format (should be automatic)\n")); printf(_(" -l, --list print summarized TOC of the archive\n")); printf(_(" -v, --verbose verbose mode\n")); @@ -1170,7 +1179,7 @@ get_dbname_oid_list_from_mfile(const char *dumpdirpath, static int restore_all_databases(const char *inputFileSpec, SimpleStringList db_exclude_patterns, RestoreOptions *opts, - int numWorkers, bool filespec_is_pipe) + int numWorkers, bool is_pipe) { SimplePtrList dbname_oid_list = {NULL, NULL}; int num_db_restore = 0; @@ -1334,7 +1343,7 @@ restore_all_databases(const char *inputFileSpec, } /* Restore the single database. */ - n_errors = restore_one_database(subdirpath, tmpopts, numWorkers, true, filespec_is_pipe); + n_errors = restore_one_database(subdirpath, tmpopts, numWorkers, true, is_pipe); n_errors_total += n_errors; -- 2.54.0.545.g6539524ca2-goog