Proposed patch: make pg_dump --data-only consider FK constraints - Mailing list pgsql-hackers

From Tom Lane
Subject Proposed patch: make pg_dump --data-only consider FK constraints
Date
Msg-id 17451.1220810800@sss.pgh.pa.us
Whole thread Raw
Responses Re: Proposed patch: make pg_dump --data-only consider FK constraints  (Heikki Linnakangas <heikki.linnakangas@enterprisedb.com>)
Re: Proposed patch: make pg_dump --data-only consider FK constraints  (David Fetter <david@fetter.org>)
List pgsql-hackers
Okay, I got tired of seeing people complain about foreign-key constraint
violations in data-only dumps.  While it's true that the problem can't
be solved in the general case (because of potentially circular
references), we could certainly make pg_dump at least *try* to order the
tables according to foreign key relationships.  It turns out not to even
require a whole lot of new code.  Accordingly I propose the attached
patch.  It will order the tables safely if it can, and otherwise
complain like this:

pg_dump: WARNING: circular foreign-key constraints among these table(s):
pg_dump:   master
pg_dump:   child
pg_dump: You may not be able to restore the dump without using --disable-triggers or temporarily dropping the
constraints.

Comments?

            regards, tom lane

Index: src/bin/pg_dump/pg_dump.c
===================================================================
RCS file: /cvsroot/pgsql/src/bin/pg_dump/pg_dump.c,v
retrieving revision 1.499
diff -c -r1.499 pg_dump.c
*** src/bin/pg_dump/pg_dump.c    30 Jul 2008 19:35:13 -0000    1.499
--- src/bin/pg_dump/pg_dump.c    7 Sep 2008 18:03:38 -0000
***************
*** 166,171 ****
--- 166,172 ----
  static void getDependencies(void);
  static void getDomainConstraints(TypeInfo *tinfo);
  static void getTableData(TableInfo *tblinfo, int numTables, bool oids);
+ static void getTableDataFKConstraints(void);
  static char *format_function_arguments(FuncInfo *finfo, char *funcargs);
  static char *format_function_arguments_old(FuncInfo *finfo, int nallargs,
                            char **allargtypes,
***************
*** 659,665 ****
--- 660,670 ----
          guessConstraintInheritance(tblinfo, numTables);

      if (!schemaOnly)
+     {
          getTableData(tblinfo, numTables, oids);
+         if (dataOnly)
+             getTableDataFKConstraints();
+     }

      if (outputBlobs && hasBlobs(g_fout))
      {
***************
*** 1392,1401 ****
--- 1397,1455 ----
              tdinfo->tdtable = &(tblinfo[i]);
              tdinfo->oids = oids;
              addObjectDependency(&tdinfo->dobj, tblinfo[i].dobj.dumpId);
+
+             tblinfo[i].dataObj = tdinfo;
          }
      }
  }

+ /*
+  * getTableDataFKConstraints -
+  *      add dump-order dependencies reflecting foreign key constraints
+  *
+  * This code is executed only in a data-only dump --- in schema+data dumps
+  * we handle foreign key issues by not creating the FK constraints until
+  * after the data is loaded.  In a data-only dump, however, we want to
+  * order the table data objects in such a way that a table's referenced
+  * tables are restored first.  (In the presence of circular references or
+  * self-references this may be impossible; we'll detect and complain about
+  * that during the dependency sorting step.)
+  */
+ static void
+ getTableDataFKConstraints(void)
+ {
+     DumpableObject **dobjs;
+     int            numObjs;
+     int            i;
+
+     /* Search through all the dumpable objects for FK constraints */
+     getDumpableObjects(&dobjs, &numObjs);
+     for (i = 0; i < numObjs; i++)
+     {
+         if (dobjs[i]->objType == DO_FK_CONSTRAINT)
+         {
+             ConstraintInfo *cinfo = (ConstraintInfo *) dobjs[i];
+             TableInfo *ftable;
+
+             /* Not interesting unless both tables are to be dumped */
+             if (cinfo->contable == NULL ||
+                 cinfo->contable->dataObj == NULL)
+                 continue;
+             ftable = findTableByOid(cinfo->confrelid);
+             if (ftable == NULL ||
+                 ftable->dataObj == NULL)
+                 continue;
+             /*
+              * Okay, make referencing table's TABLE_DATA object depend on
+              * the referenced table's TABLE_DATA object.
+              */
+             addObjectDependency(&cinfo->contable->dataObj->dobj,
+                                 ftable->dataObj->dobj.dumpId);
+         }
+     }
+     free(dobjs);
+ }
+

  /*
   * guessConstraintInheritance:
***************
*** 3626,3631 ****
--- 3680,3686 ----
                  constrinfo[j].condomain = NULL;
                  constrinfo[j].contype = contype;
                  constrinfo[j].condef = NULL;
+                 constrinfo[j].confrelid = InvalidOid;
                  constrinfo[j].conindex = indxinfo[j].dobj.dumpId;
                  constrinfo[j].conislocal = true;
                  constrinfo[j].separate = true;
***************
*** 3666,3675 ****
      ConstraintInfo *constrinfo;
      PQExpBuffer query;
      PGresult   *res;
!     int            i_condef,
!                 i_contableoid,
                  i_conoid,
!                 i_conname;
      int            ntups;

      /* pg_constraint was created in 7.3, so nothing to do if older */
--- 3721,3731 ----
      ConstraintInfo *constrinfo;
      PQExpBuffer query;
      PGresult   *res;
!     int            i_contableoid,
                  i_conoid,
!                 i_conname,
!                 i_confrelid,
!                 i_condef;
      int            ntups;

      /* pg_constraint was created in 7.3, so nothing to do if older */
***************
*** 3697,3703 ****

          resetPQExpBuffer(query);
          appendPQExpBuffer(query,
!                           "SELECT tableoid, oid, conname, "
                            "pg_catalog.pg_get_constraintdef(oid) as condef "
                            "FROM pg_catalog.pg_constraint "
                            "WHERE conrelid = '%u'::pg_catalog.oid "
--- 3753,3759 ----

          resetPQExpBuffer(query);
          appendPQExpBuffer(query,
!                           "SELECT tableoid, oid, conname, confrelid, "
                            "pg_catalog.pg_get_constraintdef(oid) as condef "
                            "FROM pg_catalog.pg_constraint "
                            "WHERE conrelid = '%u'::pg_catalog.oid "
***************
*** 3711,3716 ****
--- 3767,3773 ----
          i_contableoid = PQfnumber(res, "tableoid");
          i_conoid = PQfnumber(res, "oid");
          i_conname = PQfnumber(res, "conname");
+         i_confrelid = PQfnumber(res, "confrelid");
          i_condef = PQfnumber(res, "condef");

          constrinfo = (ConstraintInfo *) malloc(ntups * sizeof(ConstraintInfo));
***************
*** 3727,3732 ****
--- 3784,3790 ----
              constrinfo[j].condomain = NULL;
              constrinfo[j].contype = 'f';
              constrinfo[j].condef = strdup(PQgetvalue(res, j, i_condef));
+             constrinfo[j].confrelid = atooid(PQgetvalue(res, j, i_confrelid));
              constrinfo[j].conindex = 0;
              constrinfo[j].conislocal = true;
              constrinfo[j].separate = true;
***************
*** 3810,3815 ****
--- 3868,3874 ----
          constrinfo[i].condomain = tinfo;
          constrinfo[i].contype = 'c';
          constrinfo[i].condef = strdup(PQgetvalue(res, i, i_consrc));
+         constrinfo[i].confrelid = InvalidOid;
          constrinfo[i].conindex = 0;
          constrinfo[i].conislocal = true;
          constrinfo[i].separate = false;
***************
*** 4788,4793 ****
--- 4847,4853 ----
                  constrs[j].condomain = NULL;
                  constrs[j].contype = 'c';
                  constrs[j].condef = strdup(PQgetvalue(res, j, 3));
+                 constrs[j].confrelid = InvalidOid;
                  constrs[j].conindex = 0;
                  constrs[j].conislocal = (PQgetvalue(res, j, 4)[0] == 't');
                  constrs[j].separate = false;
Index: src/bin/pg_dump/pg_dump.h
===================================================================
RCS file: /cvsroot/pgsql/src/bin/pg_dump/pg_dump.h,v
retrieving revision 1.140
diff -c -r1.140 pg_dump.h
*** src/bin/pg_dump/pg_dump.h    9 May 2008 23:32:04 -0000    1.140
--- src/bin/pg_dump/pg_dump.h    7 Sep 2008 18:03:38 -0000
***************
*** 280,285 ****
--- 280,286 ----
       */
      int            numParents;        /* number of (immediate) parent tables */
      struct _tableInfo **parents;    /* TableInfos of immediate parents */
+     struct _tableDataInfo *dataObj;    /* TableDataInfo, if dumping its data */
  } TableInfo;

  typedef struct _attrDefInfo
***************
*** 352,357 ****
--- 353,359 ----
      TypeInfo   *condomain;        /* NULL if table constraint */
      char        contype;
      char       *condef;            /* definition, if CHECK or FOREIGN KEY */
+     Oid            confrelid;        /* referenced table, if FOREIGN KEY */
      DumpId        conindex;        /* identifies associated index if any */
      bool        conislocal;        /* TRUE if constraint has local definition */
      bool        separate;        /* TRUE if must dump as separate item */
Index: src/bin/pg_dump/pg_dump_sort.c
===================================================================
RCS file: /cvsroot/pgsql/src/bin/pg_dump/pg_dump_sort.c,v
retrieving revision 1.20
diff -c -r1.20 pg_dump_sort.c
*** src/bin/pg_dump/pg_dump_sort.c    1 Jan 2008 19:45:55 -0000    1.20
--- src/bin/pg_dump/pg_dump_sort.c    7 Sep 2008 18:03:38 -0000
***************
*** 947,952 ****
--- 947,975 ----
      }

      /*
+      * If all the objects are TABLE_DATA items, what we must have is a
+      * circular set of foreign key constraints (or a single self-referential
+      * table).  Give an appropriate warning and break the loop arbitrarily.
+      */
+     for (i = 0; i < nLoop; i++)
+     {
+         if (loop[i]->objType != DO_TABLE_DATA)
+             break;
+     }
+     if (i >= nLoop)
+     {
+         write_msg(NULL, "WARNING: circular foreign-key constraints among these table(s):\n");
+         for (i = 0; i < nLoop; i++)
+             write_msg(NULL, "  %s\n", loop[i]->name);
+         write_msg(NULL, "You may not be able to restore the dump without using --disable-triggers or temporarily
droppingthe constraints.\n"); 
+         if (nLoop > 1)
+             removeObjectDependency(loop[0], loop[1]->dumpId);
+         else                        /* must be a self-dependency */
+             removeObjectDependency(loop[0], loop[0]->dumpId);
+         return;
+     }
+
+     /*
       * If we can't find a principled way to break the loop, complain and break
       * it in an arbitrary fashion.
       */
***************
*** 958,964 ****
          describeDumpableObject(loop[i], buf, sizeof(buf));
          write_msg(modulename, "  %s\n", buf);
      }
!     removeObjectDependency(loop[0], loop[1]->dumpId);
  }

  /*
--- 981,991 ----
          describeDumpableObject(loop[i], buf, sizeof(buf));
          write_msg(modulename, "  %s\n", buf);
      }
!
!     if (nLoop > 1)
!         removeObjectDependency(loop[0], loop[1]->dumpId);
!     else                        /* must be a self-dependency */
!         removeObjectDependency(loop[0], loop[0]->dumpId);
  }

  /*

pgsql-hackers by date:

Previous
From: Dimitri Fontaine
Date:
Subject: Re: reducing statistics write overhead
Next
From: Heikki Linnakangas
Date:
Subject: Re: Proposed patch: make pg_dump --data-only consider FK constraints