Re: pg_restore direct to database is broken for --insert dumps - Mailing list pgsql-hackers

From Tom Lane
Subject Re: pg_restore direct to database is broken for --insert dumps
Date
Msg-id 24669.1325808083@sss.pgh.pa.us
Whole thread Raw
In response to Re: pg_restore direct to database is broken for --insert dumps  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-hackers
I wrote:
> Not easily: there could be newlines embedded in data strings or SQL
> identifiers.  I'm not seeing any way around this except to restore the
> minimal lexing capability.  One thing we could probably do is to
> restrict it to be used only when reading table data, and continue to
> assume that object creation commands can be emitted as-is.  That would
> at least get us out of needing to parse dollar-quoted literals, which
> aren't used in the INSERT commands.

Attached is a patch that restores a much-simplified version of the mini
lexer; it deals only with the sorts of things that dumpTableData_insert
actually emits.  Fixing the standard_conforming_strings issue turns out
to be really a one-liner, because the setting is in fact readily
available to this code.  Maybe we should have done it that way to begin
with :-( ... though I admit to being glad to have gotten rid of the very
questionable dollar-quote-recognition code that was there before.

Barring objections to the approach, I'll apply and back-patch this
tomorrow.

            regards, tom lane

diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index d9edebb0f48308787bf263cd90504d3c5ed80e8b..234e50fb734573f9f3287d952821702a0838c4fd 100644
*** a/src/bin/pg_dump/pg_backup_archiver.c
--- b/src/bin/pg_dump/pg_backup_archiver.c
*************** restore_toc_entry(ArchiveHandle *AH, Toc
*** 620,639 ****
                      if (te->copyStmt && strlen(te->copyStmt) > 0)
                      {
                          ahprintf(AH, "%s", te->copyStmt);
!                         AH->writingCopyData = true;
                      }

                      (*AH->PrintTocDataPtr) (AH, te, ropt);

                      /*
                       * Terminate COPY if needed.
                       */
!                     if (AH->writingCopyData)
!                     {
!                         if (RestoringToDB(AH))
!                             EndDBCopyMode(AH, te);
!                         AH->writingCopyData = false;
!                     }

                      /* close out the transaction started above */
                      if (is_parallel && te->created)
--- 620,639 ----
                      if (te->copyStmt && strlen(te->copyStmt) > 0)
                      {
                          ahprintf(AH, "%s", te->copyStmt);
!                         AH->outputKind = OUTPUT_COPYDATA;
                      }
+                     else
+                         AH->outputKind = OUTPUT_OTHERDATA;

                      (*AH->PrintTocDataPtr) (AH, te, ropt);

                      /*
                       * Terminate COPY if needed.
                       */
!                     if (AH->outputKind == OUTPUT_COPYDATA &&
!                         RestoringToDB(AH))
!                         EndDBCopyMode(AH, te);
!                     AH->outputKind = OUTPUT_SQLCMDS;

                      /* close out the transaction started above */
                      if (is_parallel && te->created)
*************** _allocAH(const char *FileSpec, const Arc
*** 1975,1980 ****
--- 1975,1982 ----
      AH->mode = mode;
      AH->compression = compression;

+     memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse));
+
      /* Open stdout with no compression for AH output handle */
      AH->gzOut = 0;
      AH->OF = stdout;
*************** CloneArchive(ArchiveHandle *AH)
*** 4194,4200 ****
      clone = (ArchiveHandle *) pg_malloc(sizeof(ArchiveHandle));
      memcpy(clone, AH, sizeof(ArchiveHandle));

!     /* Handle format-independent fields ... none at the moment */

      /* The clone will have its own connection, so disregard connection state */
      clone->connection = NULL;
--- 4196,4203 ----
      clone = (ArchiveHandle *) pg_malloc(sizeof(ArchiveHandle));
      memcpy(clone, AH, sizeof(ArchiveHandle));

!     /* Handle format-independent fields */
!     memset(&(clone->sqlparse), 0, sizeof(clone->sqlparse));

      /* The clone will have its own connection, so disregard connection state */
      clone->connection = NULL;
*************** DeCloneArchive(ArchiveHandle *AH)
*** 4227,4233 ****
      /* Clear format-specific state */
      (AH->DeClonePtr) (AH);

!     /* Clear state allocated by CloneArchive ... none at the moment */

      /* Clear any connection-local state */
      if (AH->currUser)
--- 4230,4238 ----
      /* Clear format-specific state */
      (AH->DeClonePtr) (AH);

!     /* Clear state allocated by CloneArchive */
!     if (AH->sqlparse.curCmd)
!         destroyPQExpBuffer(AH->sqlparse.curCmd);

      /* Clear any connection-local state */
      if (AH->currUser)
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index 7a4fd360737ab3c48fdd8776879f804e0a1062a4..6dd5158ab4d9f7ee815e555029839190f335d9a1 100644
*** a/src/bin/pg_dump/pg_backup_archiver.h
--- b/src/bin/pg_dump/pg_backup_archiver.h
*************** typedef size_t (*CustomOutPtr) (struct _
*** 134,139 ****
--- 134,153 ----

  typedef enum
  {
+     SQL_SCAN = 0,                /* normal */
+     SQL_IN_SINGLE_QUOTE,        /* '...' literal */
+     SQL_IN_DOUBLE_QUOTE            /* "..." identifier */
+ } sqlparseState;
+
+ typedef struct
+ {
+     sqlparseState state;        /* see above */
+     bool        backSlash;        /* next char is backslash quoted? */
+     PQExpBuffer curCmd;            /* incomplete line (NULL if not created) */
+ } sqlparseInfo;
+
+ typedef enum
+ {
      STAGE_NONE = 0,
      STAGE_INITIALIZING,
      STAGE_PROCESSING,
*************** typedef enum
*** 142,147 ****
--- 156,168 ----

  typedef enum
  {
+     OUTPUT_SQLCMDS = 0,            /* emitting general SQL commands */
+     OUTPUT_COPYDATA,            /* writing COPY data */
+     OUTPUT_OTHERDATA            /* writing data as INSERT commands */
+ } ArchiverOutput;
+
+ typedef enum
+ {
      REQ_SCHEMA = 1,
      REQ_DATA = 2,
      REQ_ALL = REQ_SCHEMA + REQ_DATA
*************** typedef struct _archiveHandle
*** 167,172 ****
--- 188,195 ----
                                   * Added V1.7 */
      ArchiveFormat format;        /* Archive format */

+     sqlparseInfo sqlparse;        /* state for parsing INSERT data */
+
      time_t        createDate;        /* Date archive created */

      /*
*************** typedef struct _archiveHandle
*** 217,223 ****
      PGconn       *connection;
      int            connectToDB;    /* Flag to indicate if direct DB connection is
                                   * required */
!     bool        writingCopyData;    /* True when we are sending COPY data */
      bool        pgCopyIn;        /* Currently in libpq 'COPY IN' mode. */

      int            loFd;            /* BLOB fd */
--- 240,246 ----
      PGconn       *connection;
      int            connectToDB;    /* Flag to indicate if direct DB connection is
                                   * required */
!     ArchiverOutput outputKind;    /* Flag for what we're currently writing */
      bool        pgCopyIn;        /* Currently in libpq 'COPY IN' mode. */

      int            loFd;            /* BLOB fd */
diff --git a/src/bin/pg_dump/pg_backup_db.c b/src/bin/pg_dump/pg_backup_db.c
index bd1b8efac8285b4df741b4b814b889a0f6c1d124..62c8b3356c6f5e8c164ba4bc36dcbb16072d5ca2 100644
*** a/src/bin/pg_dump/pg_backup_db.c
--- b/src/bin/pg_dump/pg_backup_db.c
*************** ExecuteSqlCommand(ArchiveHandle *AH, con
*** 365,378 ****


  /*
   * Implement ahwrite() for direct-to-DB restore
   */
  int
  ExecuteSqlCommandBuf(ArchiveHandle *AH, const char *buf, size_t bufLen)
  {
!     if (AH->writingCopyData)
      {
          /*
           * We drop the data on the floor if libpq has failed to enter COPY
           * mode; this allows us to behave reasonably when trying to continue
           * after an error in a COPY command.
--- 365,456 ----


  /*
+  * Process non-COPY table data (that is, INSERT commands).
+  *
+  * The commands have been run together as one long string for compressibility,
+  * and we are receiving them in bufferloads with arbitrary boundaries, so we
+  * have to locate command boundaries and save partial commands across calls.
+  * All state must be kept in AH->sqlparse, not in local variables of this
+  * routine.  We assume that AH->sqlparse was filled with zeroes when created.
+  *
+  * We have to lex the data to the extent of identifying literals and quoted
+  * identifiers, so that we can recognize statement-terminating semicolons.
+  * We assume that INSERT data will not contain SQL comments, E'' literals,
+  * or dollar-quoted strings, so this is much simpler than a full SQL lexer.
+  */
+ static void
+ ExecuteInsertCommands(ArchiveHandle *AH, const char *buf, size_t bufLen)
+ {
+     const char *qry = buf;
+     const char *eos = buf + bufLen;
+
+     /* initialize command buffer if first time through */
+     if (AH->sqlparse.curCmd == NULL)
+         AH->sqlparse.curCmd = createPQExpBuffer();
+
+     for (; qry < eos; qry++)
+     {
+         char    ch = *qry;
+
+         /* For neatness, we skip any newlines between commands */
+         if (!(ch == '\n' && AH->sqlparse.curCmd->len == 0))
+             appendPQExpBufferChar(AH->sqlparse.curCmd, ch);
+
+         switch (AH->sqlparse.state)
+         {
+             case SQL_SCAN:        /* Default state == 0, set in _allocAH */
+                 if (ch == ';')
+                 {
+                     /*
+                      * We've found the end of a statement. Send it and reset
+                      * the buffer.
+                      */
+                     ExecuteSqlCommand(AH, AH->sqlparse.curCmd->data,
+                                       "could not execute query");
+                     resetPQExpBuffer(AH->sqlparse.curCmd);
+                 }
+                 else if (ch == '\'')
+                 {
+                     AH->sqlparse.state = SQL_IN_SINGLE_QUOTE;
+                     AH->sqlparse.backSlash = false;
+                 }
+                 else if (ch == '"')
+                 {
+                     AH->sqlparse.state = SQL_IN_DOUBLE_QUOTE;
+                 }
+                 break;
+
+             case SQL_IN_SINGLE_QUOTE:
+                 /* We needn't handle '' specially */
+                 if (ch == '\'' && !AH->sqlparse.backSlash)
+                     AH->sqlparse.state = SQL_SCAN;
+                 else if (ch == '\\' && !AH->public.std_strings)
+                     AH->sqlparse.backSlash = !AH->sqlparse.backSlash;
+                 else
+                     AH->sqlparse.backSlash = false;
+                 break;
+
+             case SQL_IN_DOUBLE_QUOTE:
+                 /* We needn't handle "" specially */
+                 if (ch == '"')
+                     AH->sqlparse.state = SQL_SCAN;
+                 break;
+         }
+     }
+ }
+
+
+ /*
   * Implement ahwrite() for direct-to-DB restore
   */
  int
  ExecuteSqlCommandBuf(ArchiveHandle *AH, const char *buf, size_t bufLen)
  {
!     if (AH->outputKind == OUTPUT_COPYDATA)
      {
          /*
+          * COPY data.
+          *
           * We drop the data on the floor if libpq has failed to enter COPY
           * mode; this allows us to behave reasonably when trying to continue
           * after an error in a COPY command.
*************** ExecuteSqlCommandBuf(ArchiveHandle *AH,
*** 382,390 ****
--- 460,478 ----
              die_horribly(AH, modulename, "error returned by PQputCopyData: %s",
                           PQerrorMessage(AH->connection));
      }
+     else if (AH->outputKind == OUTPUT_OTHERDATA)
+     {
+         /*
+          * Table data expressed as INSERT commands.
+          */
+         ExecuteInsertCommands(AH, buf, bufLen);
+     }
      else
      {
          /*
+          * General SQL commands; we assume that commands will not be split
+          * across calls.
+          *
           * In most cases the data passed to us will be a null-terminated
           * string, but if it's not, we have to add a trailing null.
           */
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 8db4071684780ca5c34f3089fbb02564e220f27e..d1598ea4e98f8db05a39b0de42c9bc3465bb8353 100644
*** a/src/bin/pg_dump/pg_dump.c
--- b/src/bin/pg_dump/pg_dump.c
*************** dumpTableData_copy(Archive *fout, void *
*** 1399,1404 ****
--- 1399,1412 ----
      return 1;
  }

+ /*
+  * Dump table data using INSERT commands.
+  *
+  * Caution: when we restore from an archive file direct to database, the
+  * INSERT commands emitted by this function have to be parsed by
+  * pg_backup_db.c's ExecuteInsertCommands(), which will not handle comments,
+  * E'' strings, or dollar-quoted strings.  So don't emit anything like that.
+  */
  static int
  dumpTableData_insert(Archive *fout, void *dcontext)
  {

pgsql-hackers by date:

Previous
From: Peter Geoghegan
Date:
Subject: Re: Progress on fast path sorting, btree index creation time
Next
From: Josh Berkus
Date:
Subject: Re: random_page_cost vs seq_page_cost