Re: pg_restore direct to database is broken for --insert dumps - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: pg_restore direct to database is broken for --insert dumps |
Date | |
Msg-id | 24669.1325808083@sss.pgh.pa.us Whole thread Raw |
In response to | Re: pg_restore direct to database is broken for --insert dumps (Tom Lane <tgl@sss.pgh.pa.us>) |
List | pgsql-hackers |
I wrote: > Not easily: there could be newlines embedded in data strings or SQL > identifiers. I'm not seeing any way around this except to restore the > minimal lexing capability. One thing we could probably do is to > restrict it to be used only when reading table data, and continue to > assume that object creation commands can be emitted as-is. That would > at least get us out of needing to parse dollar-quoted literals, which > aren't used in the INSERT commands. Attached is a patch that restores a much-simplified version of the mini lexer; it deals only with the sorts of things that dumpTableData_insert actually emits. Fixing the standard_conforming_strings issue turns out to be really a one-liner, because the setting is in fact readily available to this code. Maybe we should have done it that way to begin with :-( ... though I admit to being glad to have gotten rid of the very questionable dollar-quote-recognition code that was there before. Barring objections to the approach, I'll apply and back-patch this tomorrow. regards, tom lane diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index d9edebb0f48308787bf263cd90504d3c5ed80e8b..234e50fb734573f9f3287d952821702a0838c4fd 100644 *** a/src/bin/pg_dump/pg_backup_archiver.c --- b/src/bin/pg_dump/pg_backup_archiver.c *************** restore_toc_entry(ArchiveHandle *AH, Toc *** 620,639 **** if (te->copyStmt && strlen(te->copyStmt) > 0) { ahprintf(AH, "%s", te->copyStmt); ! AH->writingCopyData = true; } (*AH->PrintTocDataPtr) (AH, te, ropt); /* * Terminate COPY if needed. */ ! if (AH->writingCopyData) ! { ! if (RestoringToDB(AH)) ! EndDBCopyMode(AH, te); ! AH->writingCopyData = false; ! } /* close out the transaction started above */ if (is_parallel && te->created) --- 620,639 ---- if (te->copyStmt && strlen(te->copyStmt) > 0) { ahprintf(AH, "%s", te->copyStmt); ! AH->outputKind = OUTPUT_COPYDATA; } + else + AH->outputKind = OUTPUT_OTHERDATA; (*AH->PrintTocDataPtr) (AH, te, ropt); /* * Terminate COPY if needed. */ ! if (AH->outputKind == OUTPUT_COPYDATA && ! RestoringToDB(AH)) ! EndDBCopyMode(AH, te); ! AH->outputKind = OUTPUT_SQLCMDS; /* close out the transaction started above */ if (is_parallel && te->created) *************** _allocAH(const char *FileSpec, const Arc *** 1975,1980 **** --- 1975,1982 ---- AH->mode = mode; AH->compression = compression; + memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse)); + /* Open stdout with no compression for AH output handle */ AH->gzOut = 0; AH->OF = stdout; *************** CloneArchive(ArchiveHandle *AH) *** 4194,4200 **** clone = (ArchiveHandle *) pg_malloc(sizeof(ArchiveHandle)); memcpy(clone, AH, sizeof(ArchiveHandle)); ! /* Handle format-independent fields ... none at the moment */ /* The clone will have its own connection, so disregard connection state */ clone->connection = NULL; --- 4196,4203 ---- clone = (ArchiveHandle *) pg_malloc(sizeof(ArchiveHandle)); memcpy(clone, AH, sizeof(ArchiveHandle)); ! /* Handle format-independent fields */ ! memset(&(clone->sqlparse), 0, sizeof(clone->sqlparse)); /* The clone will have its own connection, so disregard connection state */ clone->connection = NULL; *************** DeCloneArchive(ArchiveHandle *AH) *** 4227,4233 **** /* Clear format-specific state */ (AH->DeClonePtr) (AH); ! /* Clear state allocated by CloneArchive ... none at the moment */ /* Clear any connection-local state */ if (AH->currUser) --- 4230,4238 ---- /* Clear format-specific state */ (AH->DeClonePtr) (AH); ! /* Clear state allocated by CloneArchive */ ! if (AH->sqlparse.curCmd) ! destroyPQExpBuffer(AH->sqlparse.curCmd); /* Clear any connection-local state */ if (AH->currUser) diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h index 7a4fd360737ab3c48fdd8776879f804e0a1062a4..6dd5158ab4d9f7ee815e555029839190f335d9a1 100644 *** a/src/bin/pg_dump/pg_backup_archiver.h --- b/src/bin/pg_dump/pg_backup_archiver.h *************** typedef size_t (*CustomOutPtr) (struct _ *** 134,139 **** --- 134,153 ---- typedef enum { + SQL_SCAN = 0, /* normal */ + SQL_IN_SINGLE_QUOTE, /* '...' literal */ + SQL_IN_DOUBLE_QUOTE /* "..." identifier */ + } sqlparseState; + + typedef struct + { + sqlparseState state; /* see above */ + bool backSlash; /* next char is backslash quoted? */ + PQExpBuffer curCmd; /* incomplete line (NULL if not created) */ + } sqlparseInfo; + + typedef enum + { STAGE_NONE = 0, STAGE_INITIALIZING, STAGE_PROCESSING, *************** typedef enum *** 142,147 **** --- 156,168 ---- typedef enum { + OUTPUT_SQLCMDS = 0, /* emitting general SQL commands */ + OUTPUT_COPYDATA, /* writing COPY data */ + OUTPUT_OTHERDATA /* writing data as INSERT commands */ + } ArchiverOutput; + + typedef enum + { REQ_SCHEMA = 1, REQ_DATA = 2, REQ_ALL = REQ_SCHEMA + REQ_DATA *************** typedef struct _archiveHandle *** 167,172 **** --- 188,195 ---- * Added V1.7 */ ArchiveFormat format; /* Archive format */ + sqlparseInfo sqlparse; /* state for parsing INSERT data */ + time_t createDate; /* Date archive created */ /* *************** typedef struct _archiveHandle *** 217,223 **** PGconn *connection; int connectToDB; /* Flag to indicate if direct DB connection is * required */ ! bool writingCopyData; /* True when we are sending COPY data */ bool pgCopyIn; /* Currently in libpq 'COPY IN' mode. */ int loFd; /* BLOB fd */ --- 240,246 ---- PGconn *connection; int connectToDB; /* Flag to indicate if direct DB connection is * required */ ! ArchiverOutput outputKind; /* Flag for what we're currently writing */ bool pgCopyIn; /* Currently in libpq 'COPY IN' mode. */ int loFd; /* BLOB fd */ diff --git a/src/bin/pg_dump/pg_backup_db.c b/src/bin/pg_dump/pg_backup_db.c index bd1b8efac8285b4df741b4b814b889a0f6c1d124..62c8b3356c6f5e8c164ba4bc36dcbb16072d5ca2 100644 *** a/src/bin/pg_dump/pg_backup_db.c --- b/src/bin/pg_dump/pg_backup_db.c *************** ExecuteSqlCommand(ArchiveHandle *AH, con *** 365,378 **** /* * Implement ahwrite() for direct-to-DB restore */ int ExecuteSqlCommandBuf(ArchiveHandle *AH, const char *buf, size_t bufLen) { ! if (AH->writingCopyData) { /* * We drop the data on the floor if libpq has failed to enter COPY * mode; this allows us to behave reasonably when trying to continue * after an error in a COPY command. --- 365,456 ---- /* + * Process non-COPY table data (that is, INSERT commands). + * + * The commands have been run together as one long string for compressibility, + * and we are receiving them in bufferloads with arbitrary boundaries, so we + * have to locate command boundaries and save partial commands across calls. + * All state must be kept in AH->sqlparse, not in local variables of this + * routine. We assume that AH->sqlparse was filled with zeroes when created. + * + * We have to lex the data to the extent of identifying literals and quoted + * identifiers, so that we can recognize statement-terminating semicolons. + * We assume that INSERT data will not contain SQL comments, E'' literals, + * or dollar-quoted strings, so this is much simpler than a full SQL lexer. + */ + static void + ExecuteInsertCommands(ArchiveHandle *AH, const char *buf, size_t bufLen) + { + const char *qry = buf; + const char *eos = buf + bufLen; + + /* initialize command buffer if first time through */ + if (AH->sqlparse.curCmd == NULL) + AH->sqlparse.curCmd = createPQExpBuffer(); + + for (; qry < eos; qry++) + { + char ch = *qry; + + /* For neatness, we skip any newlines between commands */ + if (!(ch == '\n' && AH->sqlparse.curCmd->len == 0)) + appendPQExpBufferChar(AH->sqlparse.curCmd, ch); + + switch (AH->sqlparse.state) + { + case SQL_SCAN: /* Default state == 0, set in _allocAH */ + if (ch == ';') + { + /* + * We've found the end of a statement. Send it and reset + * the buffer. + */ + ExecuteSqlCommand(AH, AH->sqlparse.curCmd->data, + "could not execute query"); + resetPQExpBuffer(AH->sqlparse.curCmd); + } + else if (ch == '\'') + { + AH->sqlparse.state = SQL_IN_SINGLE_QUOTE; + AH->sqlparse.backSlash = false; + } + else if (ch == '"') + { + AH->sqlparse.state = SQL_IN_DOUBLE_QUOTE; + } + break; + + case SQL_IN_SINGLE_QUOTE: + /* We needn't handle '' specially */ + if (ch == '\'' && !AH->sqlparse.backSlash) + AH->sqlparse.state = SQL_SCAN; + else if (ch == '\\' && !AH->public.std_strings) + AH->sqlparse.backSlash = !AH->sqlparse.backSlash; + else + AH->sqlparse.backSlash = false; + break; + + case SQL_IN_DOUBLE_QUOTE: + /* We needn't handle "" specially */ + if (ch == '"') + AH->sqlparse.state = SQL_SCAN; + break; + } + } + } + + + /* * Implement ahwrite() for direct-to-DB restore */ int ExecuteSqlCommandBuf(ArchiveHandle *AH, const char *buf, size_t bufLen) { ! if (AH->outputKind == OUTPUT_COPYDATA) { /* + * COPY data. + * * We drop the data on the floor if libpq has failed to enter COPY * mode; this allows us to behave reasonably when trying to continue * after an error in a COPY command. *************** ExecuteSqlCommandBuf(ArchiveHandle *AH, *** 382,390 **** --- 460,478 ---- die_horribly(AH, modulename, "error returned by PQputCopyData: %s", PQerrorMessage(AH->connection)); } + else if (AH->outputKind == OUTPUT_OTHERDATA) + { + /* + * Table data expressed as INSERT commands. + */ + ExecuteInsertCommands(AH, buf, bufLen); + } else { /* + * General SQL commands; we assume that commands will not be split + * across calls. + * * In most cases the data passed to us will be a null-terminated * string, but if it's not, we have to add a trailing null. */ diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 8db4071684780ca5c34f3089fbb02564e220f27e..d1598ea4e98f8db05a39b0de42c9bc3465bb8353 100644 *** a/src/bin/pg_dump/pg_dump.c --- b/src/bin/pg_dump/pg_dump.c *************** dumpTableData_copy(Archive *fout, void * *** 1399,1404 **** --- 1399,1412 ---- return 1; } + /* + * Dump table data using INSERT commands. + * + * Caution: when we restore from an archive file direct to database, the + * INSERT commands emitted by this function have to be parsed by + * pg_backup_db.c's ExecuteInsertCommands(), which will not handle comments, + * E'' strings, or dollar-quoted strings. So don't emit anything like that. + */ static int dumpTableData_insert(Archive *fout, void *dcontext) {
pgsql-hackers by date: