I needed a way to run diffs on two
database dumps to see what data developers put in their
local databases versus the initial database load. The
pg_dump utility with --inserts works well for this, but
since the order in which the data is returned of the server
is not guaranteed I hacked this patch to make life
simpler.
Using --ordered will order the data by primary key or
unique index, if one exists, and use the "smallest" ordering
(i.e. least number of columns required for a unique
order).
Note that --ordered could crush your database server if you
try to order very large tables, so use judiciously.
This is my first patch submission, so I hope I've followed
protocol. If not, please be gentle!
Regards,
Bob Lunney
bob_lunney dot yahoo dot com
diff -cNr src/bin/pg_dump/pg_dump.c.orig
src/bin/pg_dump/pg_dump.c
*** src/bin/pg_dump/pg_dump.c.orig 2010-04-06
11:21:48.000000000 -0400
--- src/bin/pg_dump/pg_dump.c 2010-04-15
10:28:49.000000000 -0400
***************
*** 111,116 ****
--- 111,117 ----
static int disable_dollar_quoting = 0;
static int dump_inserts = 0;
static int column_inserts = 0;
+ static int ordered = 0;
static void help(const char *progname);
***************
*** 275,280 ****
--- 276,282 ----
{"inserts", no_argument,
&dump_inserts, 1},
{"lock-wait-timeout",
required_argument, NULL, 2},
{"no-tablespaces", no_argument,
&outputNoTablespaces, 1},
+ {"ordered", no_argument,
&ordered, 1},
{"role", required_argument, NULL,
3},
{"use-set-session-authorization",
no_argument, &use_setsessauth, 1},
***************
*** 493,498 ****
--- 495,506 ----
exit(1);
}
+ if (!dump_inserts && ordered)
+ {
+ write_msg(NULL, "option --ordered
cannot be used without --inserts or --column_inserts\n");
+ exit(1);
+ }
+
/* open the output file */
if (pg_strcasecmp(format, "a") == 0 ||
pg_strcasecmp(format, "append") == 0)
{
***************
*** 822,827 ****
--- 830,836 ----
printf(_(" --disable-dollar-quoting
disable dollar quoting, use SQL standard quoting\n"));
printf(_(" --disable-triggers
disable triggers during data-only restore\n"));
printf(_(" --no-tablespaces
do not dump tablespace assignments\n"));
+ printf(_(" --ordered
order insert statements by primary key or unique
index\n"));
printf(_(" --role=ROLENAME
do SET ROLE before dump\n"));
printf(_("
--use-set-session-authorization\n"
"
use SET SESSION AUTHORIZATION
commands instead of\n"
***************
*** 1210,1215 ****
--- 1219,1225 ----
TableDataInfo *tdinfo = (TableDataInfo *)
dcontext;
TableInfo *tbinfo = tdinfo->tdtable;
const char *classname = tbinfo->dobj.name;
+ PQExpBuffer p = createPQExpBuffer();
PQExpBuffer q = createPQExpBuffer();
PGresult *res;
int tuple;
***************
*** 1239,1244 ****
--- 1249,1292 ----
classname));
}
+ /*
+ * If --ordered is specified on the command
line get the primary key or
+ * unique index column positions and order the
select using those. If
+ * neither exist say so and just do an
unordered select. The ORDER BY and
+ * LIMIT 1 insure we're using the least number
of rows in the
+ * sort.
+ */
+
+ if (ordered)
+ {
+ appendPQExpBuffer(p, "SELECT
array_to_string(indkey, ','), array_length(i.indkey, 1) "
+
" FROM pg_catalog.pg_index i "
+
" WHERE (i.indisprimary = true or
i.indisunique = true) "
+
" AND i.indisvalid = true "
+
" AND i.indrelid =
'%s'::regclass "
+
" ORDER BY 2, 1 LIMIT 1",
+
fmtQualifiedId(tbinfo->dobj.namespace->dobj.name,
classname));
+
+ res = PQexec(g_conn, p->data);
+ check_sql_result(res, g_conn,
p->data, PGRES_TUPLES_OK);
+
+ if (PQntuples(res) == 0)
+ {
+ write_msg(NULL, "Table
%s does not have a primary key or unique index. The dump
for this table is unordered.\n",
+
fmtQualifiedId(tbinfo->dobj.namespace->dobj.name,
classname));
+ }
+ else
+ {
+ const char *s =
PQgetvalue(res, 0, 0);
+ if (s != NULL)
+ {
+
appendPQExpBuffer(q, " ORDER BY %s", s);
+ }
+ }
+ PQclear(res);
+ destroyPQExpBuffer(p);
+ }
+
res = PQexec(g_conn, q->data);
check_sql_result(res, g_conn, q->data,
PGRES_COMMAND_OK);
diff -cNr doc/man1/pg_dump.1.orig doc/man1/pg_dump.1
*** doc/man1/pg_dump.1.orig 2010-04-15
12:05:54.000000000 -0400
--- doc/man1/pg_dump.1 2010-04-15 12:30:55.000000000
-0400
***************
*** 356,361 ****
--- 356,368 ----
an error in reloading a row causes only that row to be
lost rather
than the entire table contents.
.TP
+ \fB--ordered\fR
+ Order the data being dumped. This option is mainly
useful
+ for running a \fBdiff(1)\fR on two plain dump files to
detect
+ data changes, and will be very slow
+ when dumping large tables. Must be used with
+ \fB--inserts\fR or \fB--column-inserts\fR.
+ .TP
\fB--disable-dollar-quoting\fR
This option disables the use of dollar quoting for
function bodies,
and forces them to be quoted using SQL standard string
syntax.
diff -cNr doc/src/sgml/ref/pg_dump.sgml.orig
doc/src/sgml/ref/pg_dump.sgml
*** doc/src/sgml/ref/pg_dump.sgml.orig 2010-04-15
12:18:49.000000000 -0400
--- doc/src/sgml/ref/pg_dump.sgml 2010-04-15
12:30:01.000000000 -0400
***************
*** 561,566 ****
--- 561,582 ----
</varlistentry>
<varlistentry>
+
<term><option>--ordered</option></term>
+ <listitem>
+ <para>
+ Order the data being dumped. This option
is mainly useful
+ for running a
<application>diff</application>
+ on two plain dump files to detect data
changes,
+ and will be very slow
+ when dumping large tables. Must be used
with
+ <option>--inserts</optionor
+
<option>--column-inserts</option>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+
+ <varlistentry>
<term><option>--disable-dollar-quoting</></term>
<listitem>
<para>