I managed to completely mangle decodeUTF8() in my changes that went into
build 303. It would incorrectly decode any multibyte UTF8
representation. Don't know what I was thinking there..
Anyway, here is a patch to repair the damage, and a testcase to check
that the driver reads/writes unicode strings correctly.
Thanks to Dario Fassi for helping diagnose this.
-O
Index: org/postgresql/core/Encoding.java
===================================================================
RCS file: /usr/local/cvsroot/pgjdbc/pgjdbc/org/postgresql/core/Encoding.java,v
retrieving revision 1.15
diff -c -r1.15 Encoding.java
*** org/postgresql/core/Encoding.java 29 Jun 2004 06:43:25 -0000 1.15
--- org/postgresql/core/Encoding.java 17 Jul 2004 03:43:48 -0000
***************
*** 286,296 ****
// Length 1: \u00000 .. \u0007f
} else if (ch < 0xe0) {
// Length 2: \u00080 .. \u007ff
! ch = ch | ((data[in++] & 0x7f) << 6);
} else {
// Length 3: \u00800 .. \u0ffff
! ch = ch | ((data[in++] & 0x7f) << 12);
! ch = ch | ((data[in++] & 0x7f) << 6);
}
cdata[out++] = (char)ch;
}
--- 286,298 ----
// Length 1: \u00000 .. \u0007f
} else if (ch < 0xe0) {
// Length 2: \u00080 .. \u007ff
! ch = ((ch & 0x1f) << 6);
! ch = ch | (data[in++] & 0x3f);
} else {
// Length 3: \u00800 .. \u0ffff
! ch = ((ch & 0x0f) << 12);
! ch = ch | ((data[in++] & 0x3f) << 6);
! ch = ch | (data[in++] & 0x3f);
}
cdata[out++] = (char)ch;
}
Index: org/postgresql/test/jdbc2/Jdbc2TestSuite.java
===================================================================
RCS file: /usr/local/cvsroot/pgjdbc/pgjdbc/org/postgresql/test/jdbc2/Jdbc2TestSuite.java,v
retrieving revision 1.15
diff -c -r1.15 Jdbc2TestSuite.java
*** org/postgresql/test/jdbc2/Jdbc2TestSuite.java 15 Jul 2004 10:10:30 -0000 1.15
--- org/postgresql/test/jdbc2/Jdbc2TestSuite.java 17 Jul 2004 03:43:48 -0000
***************
*** 59,64 ****
--- 59,65 ----
suite.addTestSuite(JBuilderTest.class);
suite.addTestSuite(MiscTest.class);
suite.addTestSuite(NotifyTest.class);
+ suite.addTestSuite(DatabaseEncodingTest.class);
// Fastpath/LargeObject
suite.addTestSuite(BlobTest.class);
*** /dev/null Tue Jan 27 23:20:00 2004
--- org/postgresql/test/jdbc2/DatabaseEncodingTest.java Sat Jul 17 15:38:54 2004
***************
*** 0 ****
--- 1,100 ----
+ package org.postgresql.test.jdbc2;
+
+ import org.postgresql.test.TestUtil;
+ import junit.framework.TestCase;
+ import java.sql.*;
+
+ /*
+ * Test case for Dario's encoding problems.
+ */
+ public class DatabaseEncodingTest extends TestCase
+ {
+ private Connection con;
+
+ public DatabaseEncodingTest(String name)
+ {
+ super(name);
+ }
+
+ private static final int STEP = 30;
+
+ // Set up the fixture for this testcase: a connection to a database with
+ // a table for this test.
+ protected void setUp() throws Exception
+ {
+ con = TestUtil.openDB();
+ TestUtil.createTable(con,
+ "testdbencoding",
+ "unicode_ordinal integer primary key not null, unicode_string varchar(" + STEP + ")");
+ }
+
+ // Tear down the fixture for this test case.
+ protected void tearDown() throws Exception
+ {
+ TestUtil.dropTable(con, "testdbencoding");
+ TestUtil.closeDB(con);
+ }
+
+ private static String dumpString(String s) {
+ StringBuffer sb = new StringBuffer(s.length() * 6);
+ for (int i = 0; i < s.length(); ++i) {
+ sb.append("\\u");
+ char c = s.charAt(i);
+ sb.append(Integer.toHexString((c>>12)&15));
+ sb.append(Integer.toHexString((c>>8)&15));
+ sb.append(Integer.toHexString((c>>4)&15));
+ sb.append(Integer.toHexString(c&15));
+ }
+ return sb.toString();
+ }
+
+ public void testEncoding() throws Exception {
+ // Check that we have a UNICODE server encoding, or we must skip this test.
+ Statement stmt = con.createStatement();
+ ResultSet rs = stmt.executeQuery("SELECT getdatabaseencoding()");
+ assertTrue(rs.next());
+ if (!"UNICODE".equals(rs.getString(1))) {
+ rs.close();
+ return; // not a UNICODE database.
+ }
+
+ rs.close();
+
+ con.setAutoCommit(false); // Go faster!
+
+ // Create data.
+ // NB: we only test up to d800 as code points above that are
+ // reserved for surrogates in UTF-16
+ PreparedStatement insert = con.prepareStatement("INSERT INTO testdbencoding(unicode_ordinal, unicode_string)
VALUES(?,?)");
+ for (int i = 1; i < 0xd800; i += STEP) {
+ int count = (i+STEP) > 0xd800 ? 0xd800-i : STEP;
+ char[] testChars = new char[count];
+ for (int j = 0; j < count; ++j)
+ testChars[j] = (char)(i+j);
+
+ String testString = new String(testChars);
+
+ insert.setInt(1, i);
+ insert.setString(2, testString);
+ assertEquals(1, insert.executeUpdate());
+ }
+
+ con.commit();
+
+ // Check data.
+ rs = stmt.executeQuery("SELECT unicode_ordinal, unicode_string FROM testdbencoding ORDER BY
unicode_ordinal");
+ for (int i = 1; i < 0xd800; i += STEP) {
+ assertTrue(rs.next());
+ assertEquals(i, rs.getInt(1));
+
+ int count = (i+STEP) > 0xd800 ? 0xd800-i : STEP;
+ char[] testChars = new char[count];
+ for (int j = 0; j < count; ++j)
+ testChars[j] = (char)(i+j);
+
+ String testString = new String(testChars);
+
+ assertEquals(dumpString(testString), dumpString(rs.getString(2)));
+ }
+ }
+ }