[PATCH 4/6] pg_basebackup: don't lose a zero-length file at the end of a tablespace. - Mailing list pgsql-hackers

From Joshua Elsasser
Subject [PATCH 4/6] pg_basebackup: don't lose a zero-length file at the end of a tablespace.
Date
Msg-id 1443564988-17928-5-git-send-email-josh@idealist.org
Whole thread Raw
In response to Re: Add pg_basebackup single tar output format  (Joshua Elsasser <josh@idealist.org>)
List pgsql-hackers
After a complete tar header was buffered it would only be processed
during the next iteration of the read loop. A zero-length file such as
a directory had no data to read, so the loop would exit without ever
having processed the file.
---src/bin/pg_basebackup/pg_basebackup.c | 238 +++++++++++++++++++---------------1 file changed, 134 insertions(+), 104
deletions(-)

diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index f73dd38..ccd0890 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -60,6 +60,15 @@ typedef struct TarStream {    bool         keepopen;} TarStream;
+typedef struct TarParser {
+    char        tarhdr[512];
+    TarStream    *stream;
+    bool        in_tarhdr;
+    bool        skip_file;
+    size_t        tarhdrsz;
+    size_t        filesz;
+} TarParser;
+/* Global options */static char *basedir = NULL;static TablespaceList tablespace_dirs = {NULL, NULL};
@@ -112,6 +121,10 @@ static void progress_report(int tablespacenum, const char *filename, bool force)static void
OpenTarFile(TarStream*tarfile, const char *path);static void CloseTarFile(TarStream *tarfile);static void
TarInsertRecoveryConf(TarStream*stream);
 
+static void IterateAndWriteTar(TarParser *tp, char *inbuf, int buflen,
+                               bool (*callback)(char *, void *), void *cbarg);
+static bool TarIterSkipRecoveryConf(char *h, void *arg);
+static void ReceiveTarFile(PGconn *conn, PGresult *res, int rownum);static void ReceiveAndUnpackTarFile(PGconn *conn,
PGresult*res, int rownum);static void GenerateRecoveryConf(PGconn *conn);
 
@@ -899,6 +912,120 @@ TarInsertRecoveryConf(TarStream *stream)/*
+ * Process the individual files inside the TAR stream and pass their headers
+ * to a callback which can modify or chose to skip them. The stream consists
+ * of a header and zero or more chunks, all 512 bytes long. The stream from
+ * the server is broken up into smaller pieces, so we have to track the size
+ * of the files to find the next header structure.
+ */
+static void
+IterateAndWriteTar(TarParser *tp, char *inbuf, int buflen,
+                   bool (*callback)(char *, void *), void *cbarg)
+{
+    int            rr = buflen;
+    int            pos = 0;
+
+    while (rr > 0)
+    {
+        if (tp->in_tarhdr)
+        {
+            /*
+             * We're currently reading a header structure inside the
+             * TAR stream, i.e. the file metadata.
+             */
+            if (tp->tarhdrsz < 512)
+            {
+                /*
+                 * Copy the header structure into tarhdr in case the
+                 * header is not aligned to 512 bytes or it's not
+                 * returned in whole by the last PQgetCopyData call.
+                 */
+                int            hdrleft;
+                int            bytes2copy;
+
+                hdrleft = 512 - tp->tarhdrsz;
+                bytes2copy = (rr > hdrleft ? hdrleft : rr);
+
+                memcpy(&tp->tarhdr[tp->tarhdrsz], inbuf + pos, bytes2copy);
+
+                rr -= bytes2copy;
+                pos += bytes2copy;
+                tp->tarhdrsz += bytes2copy;
+            }
+
+            if (tp->tarhdrsz == 512)
+            {
+                /*
+                 * We have the complete header structure in tarhdr, let the
+                 * callback possibly modify it or chose to skip the file. Find
+                 * out the size of the file padded to the next multiple of 512
+                 */
+                int            padding;
+
+                tp->skip_file = callback(tp->tarhdr, cbarg);
+
+                sscanf(&tp->tarhdr[124], "%11o", (unsigned int *) &tp->filesz);
+
+                padding = ((tp->filesz + 511) & ~511) - tp->filesz;
+                tp->filesz += padding;
+
+                /* Next part is the file, not the header */
+                tp->in_tarhdr = false;
+
+                /*
+                 * If we're not skipping the file, write the tar
+                 * header unmodified.
+                 */
+                if (!tp->skip_file)
+                    writeTarData(tp->stream, tp->tarhdr, 512);
+            }
+        }
+        else
+        {
+            /*
+             * We're processing a file's contents.
+             */
+            if (tp->filesz > 0)
+            {
+                /*
+                 * We still have data to read (and possibly write).
+                 */
+                int            bytes2write;
+
+                bytes2write = (tp->filesz > rr ? rr : tp->filesz);
+
+                if (!tp->skip_file)
+                    writeTarData(tp->stream, inbuf + pos, bytes2write);
+
+                rr -= bytes2write;
+                pos += bytes2write;
+                tp->filesz -= bytes2write;
+            }
+            else
+            {
+                /*
+                 * No more data in the current file, the next piece of
+                 * data (if any) will be a new file header structure.
+                 */
+                tp->in_tarhdr = true;
+                tp->skip_file = false;
+                tp->tarhdrsz = 0;
+                tp->filesz = 0;
+            }
+        }
+    }
+}
+
+
+static bool
+TarIterSkipRecoveryConf(char *h, void *arg)
+{
+    /* Skip the file if the name is recovery.conf */
+    return (strcmp(&h[0], "recovery.conf") == 0);
+}
+
+
+/* * Open a (possibly zlib-compressed) tar file for writing. The filebase * argument should be the desired filename
relativeto basedir, without a .tar * or .tar.gz file extension. If the user specified a basedir of - then stdout
 
@@ -936,12 +1063,12 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum){    TarStream    stream;    char
*copybuf= NULL;
 
-    char        tarhdr[512];    bool        basetablespace = PQgetisnull(res, rownum, 0);
-    bool        in_tarhdr = true;
-    bool        skip_file = false;
-    size_t        tarhdrsz = 0;
-    size_t        filesz = 0;
+    TarParser    parser;
+
+    MemSet(&parser, 0, sizeof(parser));
+    parser.stream = &stream;
+    parser.in_tarhdr = true;    if (basetablespace)        /* Base tablespace */
@@ -1010,106 +1137,9 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)             * Look for a recovery.conf
inthe existing tar stream. If it's             * there, we must skip it so we can later overwrite it with our
 * own version of the file.
 
-             *
-             * To do this, we have to process the individual files inside the
-             * TAR stream. The stream consists of a header and zero or more
-             * chunks, all 512 bytes long. The stream from the server is
-             * broken up into smaller pieces, so we have to track the size of
-             * the files to find the next header structure.             */
-            int            rr = r;
-            int            pos = 0;
-
-            while (rr > 0)
-            {
-                if (in_tarhdr)
-                {
-                    /*
-                     * We're currently reading a header structure inside the
-                     * TAR stream, i.e. the file metadata.
-                     */
-                    if (tarhdrsz < 512)
-                    {
-                        /*
-                         * Copy the header structure into tarhdr in case the
-                         * header is not aligned to 512 bytes or it's not
-                         * returned in whole by the last PQgetCopyData call.
-                         */
-                        int            hdrleft;
-                        int            bytes2copy;
-
-                        hdrleft = 512 - tarhdrsz;
-                        bytes2copy = (rr > hdrleft ? hdrleft : rr);
-
-                        memcpy(&tarhdr[tarhdrsz], copybuf + pos, bytes2copy);
-
-                        rr -= bytes2copy;
-                        pos += bytes2copy;
-                        tarhdrsz += bytes2copy;
-                    }
-                    else
-                    {
-                        /*
-                         * We have the complete header structure in tarhdr,
-                         * look at the file metadata: - the subsequent file
-                         * contents have to be skipped if the filename is
-                         * recovery.conf - find out the size of the file
-                         * padded to the next multiple of 512
-                         */
-                        int            padding;
-
-                        skip_file = (strcmp(&tarhdr[0], "recovery.conf") == 0);
-
-                        sscanf(&tarhdr[124], "%11o", (unsigned int *) &filesz);
-
-                        padding = ((filesz + 511) & ~511) - filesz;
-                        filesz += padding;
-
-                        /* Next part is the file, not the header */
-                        in_tarhdr = false;
-
-                        /*
-                         * If we're not skipping the file, write the tar
-                         * header unmodified.
-                         */
-                        if (!skip_file)
-                            writeTarData(&stream, tarhdr, 512);
-                    }
-                }
-                else
-                {
-                    /*
-                     * We're processing a file's contents.
-                     */
-                    if (filesz > 0)
-                    {
-                        /*
-                         * We still have data to read (and possibly write).
-                         */
-                        int            bytes2write;
-
-                        bytes2write = (filesz > rr ? rr : filesz);
-
-                        if (!skip_file)
-                            writeTarData(&stream, copybuf + pos, bytes2write);
-
-                        rr -= bytes2write;
-                        pos += bytes2write;
-                        filesz -= bytes2write;
-                    }
-                    else
-                    {
-                        /*
-                         * No more data in the current file, the next piece of
-                         * data (if any) will be a new file header structure.
-                         */
-                        in_tarhdr = true;
-                        skip_file = false;
-                        tarhdrsz = 0;
-                        filesz = 0;
-                    }
-                }
-            }
+            IterateAndWriteTar(&parser, copybuf, r,
+                               TarIterSkipRecoveryConf, NULL);        }        totaldone += r;
progress_report(rownum,stream.path, false);
 
-- 
2.3.0




pgsql-hackers by date:

Previous
From: Joshua Elsasser
Date:
Subject: [PATCH 5/6] pg_basebackup: allow GetConnection() to make non-replication connections.
Next
From: David Rowley
Date:
Subject: Re: PATCH: use foreign keys to improve join estimates v1