7.4 COPY BINARY Format Change - Mailing list pgsql-hackers

From Lee Kindness
Subject 7.4 COPY BINARY Format Change
Date
Msg-id 16170.33637.772495.67561@kelvin.csl.co.uk
Whole thread Raw
Responses Re: 7.4 COPY BINARY Format Change  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-hackers
Guys,

I've been testing 7.4 against an application today. Recompiled
everything against the new libraries. However the application includes
code which builds up a bulkload file in the documented 7.1 format. The
documentation for COPY goes on at length about the format being
forward compatible...

However of course it's changed in 7.4 for the following minor reasons:

1. Header is now PGCOPY\n\377\r\n\0 rather than PGBCOPY\n\377\r\n\0

2. Integers within the header (but not the data itself) are now in
network order, rather than native order

3. The "integer layout field" has disappeared.

4. typlen is now an int32 rather than an int16 plus an additional
int32 if a varlen type.

I've attached a patch which lets COPY read in the 7.1 format. However
i'm not convinced this is the right way to go - I think the format
which is output by 7.4 should be identical to the 7.1 format. The
"integer layout field" could be used to determine if byteswapping is
required on reading. The other changes seem to be unnecessary? If the
typlen change is kept it should be flagged in the flags field rather
than requiring a completely new format - this would allow old readers
to gracefully fail and old & new files to be read in by 7.4...

It's extremely counter-productive to break backward compatibility for
such whimsical changes! This will hurt those updating to 7.4 once it
is released...

So, I expect the patch below to be rejected - I'll happily rework the
patch to revert 7.4 to a version of the 7.1 format which results in
the same feature gain but without forfeiting backward
compatibility. Let me know.

Thanks. Lee.

Index: src/backend/commands/copy.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/commands/copy.c,v
retrieving revision 1.205
diff -c -b -r1.205 copy.c
*** src/backend/commands/copy.c    1 Aug 2003 00:15:19 -0000    1.205
--- src/backend/commands/copy.c    1 Aug 2003 14:53:35 -0000
***************
*** 91,102 ****
  static void CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
                       char *delim, char *null_print);
  static char *CopyReadAttribute(const char *delim, CopyReadResult *result);
! static Datum CopyReadBinaryAttribute(int column_no, FmgrInfo *flinfo,
!                                      Oid typelem, bool *isnull);
  static void CopyAttributeOut(char *string, char *delim);
  static List *CopyGetAttnums(Relation rel, List *attnamelist);

! static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";

  /*
   * Static communication variables ... pretty grotty, but COPY has
--- 91,107 ----
  static void CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
                       char *delim, char *null_print);
  static char *CopyReadAttribute(const char *delim, CopyReadResult *result);
! static Datum CopyReadBinaryAttribute(int version, int column_no,
!                      FmgrInfo *flinfo, Oid typelem,
!                      bool *isnull);
  static void CopyAttributeOut(char *string, char *delim);
  static List *CopyGetAttnums(Relation rel, List *attnamelist);

! static const char BinarySignature74[11] = "PGCOPY\n\377\r\n\0";
! static const char BinarySignature71[12] = "PGBCOPY\n\377\r\n\0";
! #define BINARY_FMT_74x 740
! #define BINARY_FMT_71x 710
! #define BINARY_FMT_CUR BINARY_FMT_74x

  /*
   * Static communication variables ... pretty grotty, but COPY has
***************
*** 140,148 ****
  static int    CopyPeekChar(void);
  static void CopyDonePeek(int c, bool pickup);
  static void CopySendInt32(int32 val);
! static int32 CopyGetInt32(void);
  static void CopySendInt16(int16 val);
! static int16 CopyGetInt16(void);

  /*
   * Send copy start/stop messages for frontend copies.  These have changed
--- 145,153 ----
  static int    CopyPeekChar(void);
  static void CopyDonePeek(int c, bool pickup);
  static void CopySendInt32(int32 val);
! static int32 CopyGetInt32(int version);
  static void CopySendInt16(int16 val);
! static int16 CopyGetInt16(int version);

  /*
   * Send copy start/stop messages for frontend copies.  These have changed
***************
*** 571,581 ****
   * CopyGetInt32 reads an int32 that appears in network byte order
   */
  static int32
! CopyGetInt32(void)
  {
      uint32        buf;

      CopyGetData(&buf, sizeof(buf));
      return (int32) ntohl(buf);
  }

--- 576,589 ----
   * CopyGetInt32 reads an int32 that appears in network byte order
   */
  static int32
! CopyGetInt32(int version)
  {
      uint32        buf;

      CopyGetData(&buf, sizeof(buf));
+     if( version == BINARY_FMT_71x )
+       return buf;
+     else
        return (int32) ntohl(buf);
  }

***************
*** 595,605 ****
   * CopyGetInt16 reads an int16 that appears in network byte order
   */
  static int16
! CopyGetInt16(void)
  {
      uint16        buf;

      CopyGetData(&buf, sizeof(buf));
      return (int16) ntohs(buf);
  }

--- 603,617 ----
   * CopyGetInt16 reads an int16 that appears in network byte order
   */
  static int16
! CopyGetInt16(int version)
  {
      uint16        buf;

      CopyGetData(&buf, sizeof(buf));
+
+     if( version == BINARY_FMT_71x )
+       return buf;
+     else
        return (int16) ntohs(buf);
  }

***************
*** 972,978 ****
          int32        tmp;

          /* Signature */
!         CopySendData((char *) BinarySignature, 11);
          /* Flags field */
          tmp = 0;
          if (oids)
--- 984,990 ----
          int32        tmp;

          /* Signature */
!         CopySendData((char *) BinarySignature74, 11);
          /* Flags field */
          tmp = 0;
          if (oids)
***************
*** 1136,1141 ****
--- 1148,1154 ----
      ExprContext *econtext;        /* used for ExecEvalExpr for default atts */
      MemoryContext oldcontext = CurrentMemoryContext;
      ErrorContextCallback errcontext;
+     int             version = BINARY_FMT_CUR;

      tupDesc = RelationGetDescr(rel);
      attr = tupDesc->attrs;
***************
*** 1261,1272 ****

          /* Signature */
          CopyGetData(readSig, 11);
!         if (CopyGetEof() || memcmp(readSig, BinarySignature, 11) != 0)
              ereport(ERROR,
                      (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                       errmsg("COPY file signature not recognized")));
          /* Flags field */
!         tmp = CopyGetInt32();
          if (CopyGetEof())
              ereport(ERROR,
                      (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
--- 1274,1311 ----

          /* Signature */
          CopyGetData(readSig, 11);
!         if( CopyGetEof() )
!           ereport(ERROR,
!               (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
!                errmsg("invalid COPY file header (missing signature)")));
!         if( memcmp(readSig, BinarySignature74, 11) == 0 )
!           version = BINARY_FMT_74x; /* PostgreSQL 7.4 format */
!         else if( memcmp(readSig, BinarySignature71, 11) == 0 )
!           {
!             version = BINARY_FMT_71x; /* PostgreSQL 7.1 format */
!             ereport(WARNING,
!                 (errmsg("Old version 7.1 binary COPY file")));
!             CopyGetData(readSig, 1); /* the 12th byte of the header */
!             if( CopyGetEof() )
!               ereport(ERROR,
!                   (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
!                    errmsg("invalid COPY file header (signature too short)")));
!           }
!         else
            ereport(ERROR,
                (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                 errmsg("COPY file signature not recognized")));
+         /* integer layout field */
+         if( version == BINARY_FMT_71x )
+           {
+             tmp = CopyGetInt32(version);
+             if( CopyGetEof() )
+               ereport(ERROR,
+                   (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+                    errmsg("invalid COPY file header (missing integer layout field)")));
+           }
          /* Flags field */
!         tmp = CopyGetInt32(version);
          if (CopyGetEof())
              ereport(ERROR,
                      (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
***************
*** 1278,1284 ****
                      (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                       errmsg("unrecognized critical flags in COPY file header")));
          /* Header extension length */
!         tmp = CopyGetInt32();
          if (CopyGetEof() || tmp < 0)
              ereport(ERROR,
                      (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
--- 1317,1323 ----
                      (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                       errmsg("unrecognized critical flags in COPY file header")));
          /* Header extension length */
!         tmp = CopyGetInt32(version);
          if (CopyGetEof() || tmp < 0)
              ereport(ERROR,
                      (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
***************
*** 1458,1464 ****
              /* binary */
              int16        fld_count;

!             fld_count = CopyGetInt16();
              if (CopyGetEof() || fld_count == -1)
              {
                  done = true;
--- 1497,1503 ----
              /* binary */
              int16        fld_count;

!             fld_count = CopyGetInt16(version);
              if (CopyGetEof() || fld_count == -1)
              {
                  done = true;
***************
*** 1474,1480 ****
              if (file_has_oids)
              {
                  loaded_oid =
!                     DatumGetObjectId(CopyReadBinaryAttribute(0,
                                                               &oid_in_function,
                                                               oid_in_element,
                                                               &isnull));
--- 1513,1519 ----
              if (file_has_oids)
              {
                  loaded_oid =
!                     DatumGetObjectId(CopyReadBinaryAttribute(version, 0,
                                                               &oid_in_function,
                                                               oid_in_element,
                                                               &isnull));
***************
*** 1491,1497 ****
                  int            m = attnum - 1;

                  i++;
!                 values[m] = CopyReadBinaryAttribute(i,
                                                      &in_functions[m],
                                                      elements[m],
                                                      &isnull);
--- 1530,1536 ----
                  int            m = attnum - 1;

                  i++;
!                 values[m] = CopyReadBinaryAttribute(version, i,
                                      &in_functions[m],
                                      elements[m],
                                      &isnull);
***************
*** 1879,1895 ****
   * Read a binary attribute
   */
  static Datum
! CopyReadBinaryAttribute(int column_no, FmgrInfo *flinfo, Oid typelem,
!                         bool *isnull)
  {
      int32        fld_size;
      Datum        result;

!     fld_size = CopyGetInt32();
      if (CopyGetEof())
          ereport(ERROR,
                  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                   errmsg("unexpected EOF in COPY data")));
      if (fld_size == -1)
      {
          *isnull = true;
--- 1918,1957 ----
   * Read a binary attribute
   */
  static Datum
! CopyReadBinaryAttribute(int version, int column_no, FmgrInfo *flinfo,
!             Oid typelem, bool *isnull)
  {
      int32        fld_size;
      Datum        result;

!     if( version == BINARY_FMT_71x )
!       {
!         int16 tmp_fld_size;
!         tmp_fld_size = CopyGetInt16(version);
!         if( CopyGetEof() )
!           ereport(ERROR,
!               (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
!                errmsg("unexpected EOF in COPY data")));
!         if( tmp_fld_size == -1 )
!           {
!         fld_size = CopyGetInt32(version);
!         if( CopyGetEof() )
!           ereport(ERROR,
!               (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
!                errmsg("unexpected EOF in COPY data")));
!         fld_size -= 4; /* includes sizeof these 4 bytes too */
!           }
!         else
!           fld_size = tmp_fld_size;
!       }
!     else
!       {
!         fld_size = CopyGetInt32(version);
          if (CopyGetEof())
            ereport(ERROR,
                (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                 errmsg("unexpected EOF in COPY data")));
+       }
      if (fld_size == -1)
      {
          *isnull = true;

pgsql-hackers by date:

Previous
From: Bruce Momjian
Date:
Subject: Re: Fwd: Re: [ANNOUNCE] PostgreSQL Weekly News - July 30th
Next
From: "scott.marlowe"
Date:
Subject: Re: ACCESSING POST GRESQL DATABASE THRU MFCOBOL