patch: fix hopelessly broken decodeUTF8() method - Mailing list pgsql-jdbc
From | Oliver Jowett |
---|---|
Subject | patch: fix hopelessly broken decodeUTF8() method |
Date | |
Msg-id | 40F8A343.9030000@opencloud.com Whole thread Raw |
Responses |
Re: patch: fix hopelessly broken decodeUTF8() method
|
List | pgsql-jdbc |
I managed to completely mangle decodeUTF8() in my changes that went into build 303. It would incorrectly decode any multibyte UTF8 representation. Don't know what I was thinking there.. Anyway, here is a patch to repair the damage, and a testcase to check that the driver reads/writes unicode strings correctly. Thanks to Dario Fassi for helping diagnose this. -O Index: org/postgresql/core/Encoding.java =================================================================== RCS file: /usr/local/cvsroot/pgjdbc/pgjdbc/org/postgresql/core/Encoding.java,v retrieving revision 1.15 diff -c -r1.15 Encoding.java *** org/postgresql/core/Encoding.java 29 Jun 2004 06:43:25 -0000 1.15 --- org/postgresql/core/Encoding.java 17 Jul 2004 03:43:48 -0000 *************** *** 286,296 **** // Length 1: \u00000 .. \u0007f } else if (ch < 0xe0) { // Length 2: \u00080 .. \u007ff ! ch = ch | ((data[in++] & 0x7f) << 6); } else { // Length 3: \u00800 .. \u0ffff ! ch = ch | ((data[in++] & 0x7f) << 12); ! ch = ch | ((data[in++] & 0x7f) << 6); } cdata[out++] = (char)ch; } --- 286,298 ---- // Length 1: \u00000 .. \u0007f } else if (ch < 0xe0) { // Length 2: \u00080 .. \u007ff ! ch = ((ch & 0x1f) << 6); ! ch = ch | (data[in++] & 0x3f); } else { // Length 3: \u00800 .. \u0ffff ! ch = ((ch & 0x0f) << 12); ! ch = ch | ((data[in++] & 0x3f) << 6); ! ch = ch | (data[in++] & 0x3f); } cdata[out++] = (char)ch; } Index: org/postgresql/test/jdbc2/Jdbc2TestSuite.java =================================================================== RCS file: /usr/local/cvsroot/pgjdbc/pgjdbc/org/postgresql/test/jdbc2/Jdbc2TestSuite.java,v retrieving revision 1.15 diff -c -r1.15 Jdbc2TestSuite.java *** org/postgresql/test/jdbc2/Jdbc2TestSuite.java 15 Jul 2004 10:10:30 -0000 1.15 --- org/postgresql/test/jdbc2/Jdbc2TestSuite.java 17 Jul 2004 03:43:48 -0000 *************** *** 59,64 **** --- 59,65 ---- suite.addTestSuite(JBuilderTest.class); suite.addTestSuite(MiscTest.class); suite.addTestSuite(NotifyTest.class); + suite.addTestSuite(DatabaseEncodingTest.class); // Fastpath/LargeObject suite.addTestSuite(BlobTest.class); *** /dev/null Tue Jan 27 23:20:00 2004 --- org/postgresql/test/jdbc2/DatabaseEncodingTest.java Sat Jul 17 15:38:54 2004 *************** *** 0 **** --- 1,100 ---- + package org.postgresql.test.jdbc2; + + import org.postgresql.test.TestUtil; + import junit.framework.TestCase; + import java.sql.*; + + /* + * Test case for Dario's encoding problems. + */ + public class DatabaseEncodingTest extends TestCase + { + private Connection con; + + public DatabaseEncodingTest(String name) + { + super(name); + } + + private static final int STEP = 30; + + // Set up the fixture for this testcase: a connection to a database with + // a table for this test. + protected void setUp() throws Exception + { + con = TestUtil.openDB(); + TestUtil.createTable(con, + "testdbencoding", + "unicode_ordinal integer primary key not null, unicode_string varchar(" + STEP + ")"); + } + + // Tear down the fixture for this test case. + protected void tearDown() throws Exception + { + TestUtil.dropTable(con, "testdbencoding"); + TestUtil.closeDB(con); + } + + private static String dumpString(String s) { + StringBuffer sb = new StringBuffer(s.length() * 6); + for (int i = 0; i < s.length(); ++i) { + sb.append("\\u"); + char c = s.charAt(i); + sb.append(Integer.toHexString((c>>12)&15)); + sb.append(Integer.toHexString((c>>8)&15)); + sb.append(Integer.toHexString((c>>4)&15)); + sb.append(Integer.toHexString(c&15)); + } + return sb.toString(); + } + + public void testEncoding() throws Exception { + // Check that we have a UNICODE server encoding, or we must skip this test. + Statement stmt = con.createStatement(); + ResultSet rs = stmt.executeQuery("SELECT getdatabaseencoding()"); + assertTrue(rs.next()); + if (!"UNICODE".equals(rs.getString(1))) { + rs.close(); + return; // not a UNICODE database. + } + + rs.close(); + + con.setAutoCommit(false); // Go faster! + + // Create data. + // NB: we only test up to d800 as code points above that are + // reserved for surrogates in UTF-16 + PreparedStatement insert = con.prepareStatement("INSERT INTO testdbencoding(unicode_ordinal, unicode_string) VALUES(?,?)"); + for (int i = 1; i < 0xd800; i += STEP) { + int count = (i+STEP) > 0xd800 ? 0xd800-i : STEP; + char[] testChars = new char[count]; + for (int j = 0; j < count; ++j) + testChars[j] = (char)(i+j); + + String testString = new String(testChars); + + insert.setInt(1, i); + insert.setString(2, testString); + assertEquals(1, insert.executeUpdate()); + } + + con.commit(); + + // Check data. + rs = stmt.executeQuery("SELECT unicode_ordinal, unicode_string FROM testdbencoding ORDER BY unicode_ordinal"); + for (int i = 1; i < 0xd800; i += STEP) { + assertTrue(rs.next()); + assertEquals(i, rs.getInt(1)); + + int count = (i+STEP) > 0xd800 ? 0xd800-i : STEP; + char[] testChars = new char[count]; + for (int j = 0; j < count; ++j) + testChars[j] = (char)(i+j); + + String testString = new String(testChars); + + assertEquals(dumpString(testString), dumpString(rs.getString(2))); + } + } + }
pgsql-jdbc by date: