Thread: patch: fix hopelessly broken decodeUTF8() method

patch: fix hopelessly broken decodeUTF8() method

From

Oliver Jowett

Date:

17 July 2004, 00:57:02

I managed to completely mangle decodeUTF8() in my changes that went into
build 303. It would incorrectly decode any multibyte UTF8
representation. Don't know what I was thinking there..

Anyway, here is a patch to repair the damage, and a testcase to check
that the driver reads/writes unicode strings correctly.

Thanks to Dario Fassi for helping diagnose this.

-O
Index: org/postgresql/core/Encoding.java
===================================================================
RCS file: /usr/local/cvsroot/pgjdbc/pgjdbc/org/postgresql/core/Encoding.java,v
retrieving revision 1.15
diff -c -r1.15 Encoding.java
*** org/postgresql/core/Encoding.java    29 Jun 2004 06:43:25 -0000    1.15
--- org/postgresql/core/Encoding.java    17 Jul 2004 03:43:48 -0000
***************
*** 286,296 ****
                      // Length 1: \u00000 .. \u0007f
                  } else if (ch < 0xe0) {
                      // Length 2: \u00080 .. \u007ff
!                     ch = ch | ((data[in++] & 0x7f) << 6);
                  } else {
                      // Length 3: \u00800 .. \u0ffff
!                     ch = ch | ((data[in++] & 0x7f) << 12);
!                     ch = ch | ((data[in++] & 0x7f) << 6);
                  }
                  cdata[out++] = (char)ch;
              }
--- 286,298 ----
                      // Length 1: \u00000 .. \u0007f
                  } else if (ch < 0xe0) {
                      // Length 2: \u00080 .. \u007ff
!                     ch = ((ch & 0x1f) << 6);
!                     ch = ch | (data[in++] & 0x3f);
                  } else {
                      // Length 3: \u00800 .. \u0ffff
!                     ch = ((ch & 0x0f) << 12);
!                     ch = ch | ((data[in++] & 0x3f) << 6);
!                     ch = ch | (data[in++] & 0x3f);
                  }
                  cdata[out++] = (char)ch;
              }
Index: org/postgresql/test/jdbc2/Jdbc2TestSuite.java
===================================================================
RCS file: /usr/local/cvsroot/pgjdbc/pgjdbc/org/postgresql/test/jdbc2/Jdbc2TestSuite.java,v
retrieving revision 1.15
diff -c -r1.15 Jdbc2TestSuite.java
*** org/postgresql/test/jdbc2/Jdbc2TestSuite.java    15 Jul 2004 10:10:30 -0000    1.15
--- org/postgresql/test/jdbc2/Jdbc2TestSuite.java    17 Jul 2004 03:43:48 -0000
***************
*** 59,64 ****
--- 59,65 ----
          suite.addTestSuite(JBuilderTest.class);
          suite.addTestSuite(MiscTest.class);
          suite.addTestSuite(NotifyTest.class);
+         suite.addTestSuite(DatabaseEncodingTest.class);

          // Fastpath/LargeObject
          suite.addTestSuite(BlobTest.class);
*** /dev/null    Tue Jan 27 23:20:00 2004
--- org/postgresql/test/jdbc2/DatabaseEncodingTest.java    Sat Jul 17 15:38:54 2004
***************
*** 0 ****
--- 1,100 ----
+ package org.postgresql.test.jdbc2;
+
+ import org.postgresql.test.TestUtil;
+ import junit.framework.TestCase;
+ import java.sql.*;
+
+ /*
+  * Test case for Dario's encoding problems.
+  */
+ public class DatabaseEncodingTest extends TestCase
+ {
+     private Connection con;
+
+     public DatabaseEncodingTest(String name)
+     {
+         super(name);
+     }
+
+     private static final int STEP = 30;
+
+     // Set up the fixture for this testcase: a connection to a database with
+     // a table for this test.
+     protected void setUp() throws Exception
+     {
+         con = TestUtil.openDB();
+         TestUtil.createTable(con,
+                              "testdbencoding",
+                              "unicode_ordinal integer primary key not null, unicode_string varchar(" + STEP + ")");
+     }
+
+     // Tear down the fixture for this test case.
+     protected void tearDown() throws Exception
+     {
+         TestUtil.dropTable(con, "testdbencoding");
+         TestUtil.closeDB(con);
+     }
+
+     private static String dumpString(String s) {
+         StringBuffer sb = new StringBuffer(s.length() * 6);
+         for (int i = 0; i < s.length(); ++i) {
+             sb.append("\\u");
+             char c = s.charAt(i);
+             sb.append(Integer.toHexString((c>>12)&15));
+             sb.append(Integer.toHexString((c>>8)&15));
+             sb.append(Integer.toHexString((c>>4)&15));
+             sb.append(Integer.toHexString(c&15));
+         }
+         return sb.toString();
+     }
+
+     public void testEncoding() throws Exception {
+         // Check that we have a UNICODE server encoding, or we must skip this test.
+         Statement stmt = con.createStatement();
+         ResultSet rs = stmt.executeQuery("SELECT getdatabaseencoding()");
+         assertTrue(rs.next());
+         if (!"UNICODE".equals(rs.getString(1))) {
+             rs.close();
+             return; // not a UNICODE database.
+         }
+
+         rs.close();
+
+         con.setAutoCommit(false); // Go faster!
+
+         // Create data.
+         // NB: we only test up to d800 as code points above that are
+         // reserved for surrogates in UTF-16
+         PreparedStatement insert = con.prepareStatement("INSERT INTO testdbencoding(unicode_ordinal, unicode_string)
VALUES(?,?)");         
+         for (int i = 1; i < 0xd800; i += STEP) {
+             int count = (i+STEP) > 0xd800 ? 0xd800-i : STEP;
+             char[] testChars = new char[count];
+             for (int j = 0; j < count; ++j)
+                 testChars[j] = (char)(i+j);
+
+             String testString = new String(testChars);
+
+             insert.setInt(1, i);
+             insert.setString(2, testString);
+             assertEquals(1, insert.executeUpdate());
+         }
+
+         con.commit();
+
+         // Check data.
+         rs = stmt.executeQuery("SELECT unicode_ordinal, unicode_string FROM testdbencoding ORDER BY
unicode_ordinal");
+         for (int i = 1; i < 0xd800; i += STEP) {
+             assertTrue(rs.next());
+             assertEquals(i, rs.getInt(1));
+
+             int count = (i+STEP) > 0xd800 ? 0xd800-i : STEP;
+             char[] testChars = new char[count];
+             for (int j = 0; j < count; ++j)
+                 testChars[j] = (char)(i+j);
+
+             String testString = new String(testChars);
+
+             assertEquals(dumpString(testString), dumpString(rs.getString(2)));
+         }
+     }
+ }

Re: patch: fix hopelessly broken decodeUTF8() method

From

Kris Jurka

Date:

17 July 2004, 04:44:44


On Sat, 17 Jul 2004, Oliver Jowett wrote:

> I managed to completely mangle decodeUTF8() in my changes that went into
> build 303. It would incorrectly decode any multibyte UTF8
> representation. Don't know what I was thinking there..
>

Applied and new dev version released.

Kris Jurka