I wrote:
> Not easily: there could be newlines embedded in data strings or SQL
> identifiers. I'm not seeing any way around this except to restore the
> minimal lexing capability. One thing we could probably do is to
> restrict it to be used only when reading table data, and continue to
> assume that object creation commands can be emitted as-is. That would
> at least get us out of needing to parse dollar-quoted literals, which
> aren't used in the INSERT commands.
Attached is a patch that restores a much-simplified version of the mini
lexer; it deals only with the sorts of things that dumpTableData_insert
actually emits. Fixing the standard_conforming_strings issue turns out
to be really a one-liner, because the setting is in fact readily
available to this code. Maybe we should have done it that way to begin
with :-( ... though I admit to being glad to have gotten rid of the very
questionable dollar-quote-recognition code that was there before.
Barring objections to the approach, I'll apply and back-patch this
tomorrow.
regards, tom lane
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index d9edebb0f48308787bf263cd90504d3c5ed80e8b..234e50fb734573f9f3287d952821702a0838c4fd 100644
*** a/src/bin/pg_dump/pg_backup_archiver.c
--- b/src/bin/pg_dump/pg_backup_archiver.c
*************** restore_toc_entry(ArchiveHandle *AH, Toc
*** 620,639 ****
if (te->copyStmt && strlen(te->copyStmt) > 0)
{
ahprintf(AH, "%s", te->copyStmt);
! AH->writingCopyData = true;
}
(*AH->PrintTocDataPtr) (AH, te, ropt);
/*
* Terminate COPY if needed.
*/
! if (AH->writingCopyData)
! {
! if (RestoringToDB(AH))
! EndDBCopyMode(AH, te);
! AH->writingCopyData = false;
! }
/* close out the transaction started above */
if (is_parallel && te->created)
--- 620,639 ----
if (te->copyStmt && strlen(te->copyStmt) > 0)
{
ahprintf(AH, "%s", te->copyStmt);
! AH->outputKind = OUTPUT_COPYDATA;
}
+ else
+ AH->outputKind = OUTPUT_OTHERDATA;
(*AH->PrintTocDataPtr) (AH, te, ropt);
/*
* Terminate COPY if needed.
*/
! if (AH->outputKind == OUTPUT_COPYDATA &&
! RestoringToDB(AH))
! EndDBCopyMode(AH, te);
! AH->outputKind = OUTPUT_SQLCMDS;
/* close out the transaction started above */
if (is_parallel && te->created)
*************** _allocAH(const char *FileSpec, const Arc
*** 1975,1980 ****
--- 1975,1982 ----
AH->mode = mode;
AH->compression = compression;
+ memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse));
+
/* Open stdout with no compression for AH output handle */
AH->gzOut = 0;
AH->OF = stdout;
*************** CloneArchive(ArchiveHandle *AH)
*** 4194,4200 ****
clone = (ArchiveHandle *) pg_malloc(sizeof(ArchiveHandle));
memcpy(clone, AH, sizeof(ArchiveHandle));
! /* Handle format-independent fields ... none at the moment */
/* The clone will have its own connection, so disregard connection state */
clone->connection = NULL;
--- 4196,4203 ----
clone = (ArchiveHandle *) pg_malloc(sizeof(ArchiveHandle));
memcpy(clone, AH, sizeof(ArchiveHandle));
! /* Handle format-independent fields */
! memset(&(clone->sqlparse), 0, sizeof(clone->sqlparse));
/* The clone will have its own connection, so disregard connection state */
clone->connection = NULL;
*************** DeCloneArchive(ArchiveHandle *AH)
*** 4227,4233 ****
/* Clear format-specific state */
(AH->DeClonePtr) (AH);
! /* Clear state allocated by CloneArchive ... none at the moment */
/* Clear any connection-local state */
if (AH->currUser)
--- 4230,4238 ----
/* Clear format-specific state */
(AH->DeClonePtr) (AH);
! /* Clear state allocated by CloneArchive */
! if (AH->sqlparse.curCmd)
! destroyPQExpBuffer(AH->sqlparse.curCmd);
/* Clear any connection-local state */
if (AH->currUser)
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index 7a4fd360737ab3c48fdd8776879f804e0a1062a4..6dd5158ab4d9f7ee815e555029839190f335d9a1 100644
*** a/src/bin/pg_dump/pg_backup_archiver.h
--- b/src/bin/pg_dump/pg_backup_archiver.h
*************** typedef size_t (*CustomOutPtr) (struct _
*** 134,139 ****
--- 134,153 ----
typedef enum
{
+ SQL_SCAN = 0, /* normal */
+ SQL_IN_SINGLE_QUOTE, /* '...' literal */
+ SQL_IN_DOUBLE_QUOTE /* "..." identifier */
+ } sqlparseState;
+
+ typedef struct
+ {
+ sqlparseState state; /* see above */
+ bool backSlash; /* next char is backslash quoted? */
+ PQExpBuffer curCmd; /* incomplete line (NULL if not created) */
+ } sqlparseInfo;
+
+ typedef enum
+ {
STAGE_NONE = 0,
STAGE_INITIALIZING,
STAGE_PROCESSING,
*************** typedef enum
*** 142,147 ****
--- 156,168 ----
typedef enum
{
+ OUTPUT_SQLCMDS = 0, /* emitting general SQL commands */
+ OUTPUT_COPYDATA, /* writing COPY data */
+ OUTPUT_OTHERDATA /* writing data as INSERT commands */
+ } ArchiverOutput;
+
+ typedef enum
+ {
REQ_SCHEMA = 1,
REQ_DATA = 2,
REQ_ALL = REQ_SCHEMA + REQ_DATA
*************** typedef struct _archiveHandle
*** 167,172 ****
--- 188,195 ----
* Added V1.7 */
ArchiveFormat format; /* Archive format */
+ sqlparseInfo sqlparse; /* state for parsing INSERT data */
+
time_t createDate; /* Date archive created */
/*
*************** typedef struct _archiveHandle
*** 217,223 ****
PGconn *connection;
int connectToDB; /* Flag to indicate if direct DB connection is
* required */
! bool writingCopyData; /* True when we are sending COPY data */
bool pgCopyIn; /* Currently in libpq 'COPY IN' mode. */
int loFd; /* BLOB fd */
--- 240,246 ----
PGconn *connection;
int connectToDB; /* Flag to indicate if direct DB connection is
* required */
! ArchiverOutput outputKind; /* Flag for what we're currently writing */
bool pgCopyIn; /* Currently in libpq 'COPY IN' mode. */
int loFd; /* BLOB fd */
diff --git a/src/bin/pg_dump/pg_backup_db.c b/src/bin/pg_dump/pg_backup_db.c
index bd1b8efac8285b4df741b4b814b889a0f6c1d124..62c8b3356c6f5e8c164ba4bc36dcbb16072d5ca2 100644
*** a/src/bin/pg_dump/pg_backup_db.c
--- b/src/bin/pg_dump/pg_backup_db.c
*************** ExecuteSqlCommand(ArchiveHandle *AH, con
*** 365,378 ****
/*
* Implement ahwrite() for direct-to-DB restore
*/
int
ExecuteSqlCommandBuf(ArchiveHandle *AH, const char *buf, size_t bufLen)
{
! if (AH->writingCopyData)
{
/*
* We drop the data on the floor if libpq has failed to enter COPY
* mode; this allows us to behave reasonably when trying to continue
* after an error in a COPY command.
--- 365,456 ----
/*
+ * Process non-COPY table data (that is, INSERT commands).
+ *
+ * The commands have been run together as one long string for compressibility,
+ * and we are receiving them in bufferloads with arbitrary boundaries, so we
+ * have to locate command boundaries and save partial commands across calls.
+ * All state must be kept in AH->sqlparse, not in local variables of this
+ * routine. We assume that AH->sqlparse was filled with zeroes when created.
+ *
+ * We have to lex the data to the extent of identifying literals and quoted
+ * identifiers, so that we can recognize statement-terminating semicolons.
+ * We assume that INSERT data will not contain SQL comments, E'' literals,
+ * or dollar-quoted strings, so this is much simpler than a full SQL lexer.
+ */
+ static void
+ ExecuteInsertCommands(ArchiveHandle *AH, const char *buf, size_t bufLen)
+ {
+ const char *qry = buf;
+ const char *eos = buf + bufLen;
+
+ /* initialize command buffer if first time through */
+ if (AH->sqlparse.curCmd == NULL)
+ AH->sqlparse.curCmd = createPQExpBuffer();
+
+ for (; qry < eos; qry++)
+ {
+ char ch = *qry;
+
+ /* For neatness, we skip any newlines between commands */
+ if (!(ch == '\n' && AH->sqlparse.curCmd->len == 0))
+ appendPQExpBufferChar(AH->sqlparse.curCmd, ch);
+
+ switch (AH->sqlparse.state)
+ {
+ case SQL_SCAN: /* Default state == 0, set in _allocAH */
+ if (ch == ';')
+ {
+ /*
+ * We've found the end of a statement. Send it and reset
+ * the buffer.
+ */
+ ExecuteSqlCommand(AH, AH->sqlparse.curCmd->data,
+ "could not execute query");
+ resetPQExpBuffer(AH->sqlparse.curCmd);
+ }
+ else if (ch == '\'')
+ {
+ AH->sqlparse.state = SQL_IN_SINGLE_QUOTE;
+ AH->sqlparse.backSlash = false;
+ }
+ else if (ch == '"')
+ {
+ AH->sqlparse.state = SQL_IN_DOUBLE_QUOTE;
+ }
+ break;
+
+ case SQL_IN_SINGLE_QUOTE:
+ /* We needn't handle '' specially */
+ if (ch == '\'' && !AH->sqlparse.backSlash)
+ AH->sqlparse.state = SQL_SCAN;
+ else if (ch == '\\' && !AH->public.std_strings)
+ AH->sqlparse.backSlash = !AH->sqlparse.backSlash;
+ else
+ AH->sqlparse.backSlash = false;
+ break;
+
+ case SQL_IN_DOUBLE_QUOTE:
+ /* We needn't handle "" specially */
+ if (ch == '"')
+ AH->sqlparse.state = SQL_SCAN;
+ break;
+ }
+ }
+ }
+
+
+ /*
* Implement ahwrite() for direct-to-DB restore
*/
int
ExecuteSqlCommandBuf(ArchiveHandle *AH, const char *buf, size_t bufLen)
{
! if (AH->outputKind == OUTPUT_COPYDATA)
{
/*
+ * COPY data.
+ *
* We drop the data on the floor if libpq has failed to enter COPY
* mode; this allows us to behave reasonably when trying to continue
* after an error in a COPY command.
*************** ExecuteSqlCommandBuf(ArchiveHandle *AH,
*** 382,390 ****
--- 460,478 ----
die_horribly(AH, modulename, "error returned by PQputCopyData: %s",
PQerrorMessage(AH->connection));
}
+ else if (AH->outputKind == OUTPUT_OTHERDATA)
+ {
+ /*
+ * Table data expressed as INSERT commands.
+ */
+ ExecuteInsertCommands(AH, buf, bufLen);
+ }
else
{
/*
+ * General SQL commands; we assume that commands will not be split
+ * across calls.
+ *
* In most cases the data passed to us will be a null-terminated
* string, but if it's not, we have to add a trailing null.
*/
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 8db4071684780ca5c34f3089fbb02564e220f27e..d1598ea4e98f8db05a39b0de42c9bc3465bb8353 100644
*** a/src/bin/pg_dump/pg_dump.c
--- b/src/bin/pg_dump/pg_dump.c
*************** dumpTableData_copy(Archive *fout, void *
*** 1399,1404 ****
--- 1399,1412 ----
return 1;
}
+ /*
+ * Dump table data using INSERT commands.
+ *
+ * Caution: when we restore from an archive file direct to database, the
+ * INSERT commands emitted by this function have to be parsed by
+ * pg_backup_db.c's ExecuteInsertCommands(), which will not handle comments,
+ * E'' strings, or dollar-quoted strings. So don't emit anything like that.
+ */
static int
dumpTableData_insert(Archive *fout, void *dcontext)
{