Performance problem with large insert statements - Mailing list pgsql-hackers

From Barry Lind
Subject Performance problem with large insert statements
Date
Msg-id 3C05D361.8030009@xythos.com
Whole thread Raw
Responses Re: Performance problem with large insert statements
List pgsql-hackers
In looking at some performance issues (I was trying to look at the
overhead of toast) I found that large insert statements were very slow.

My test case involved reading a file (6M file in my tests) and inserting
  it into the database into a "largeobject" like table defined as follows:

create table tblob1 (filename text, lastbyte integer, data bytea);

The first test program read the file 8000 bytes at a time and inserted
them into the above table until the entire file was inserted.  This test
program used a regular insert statement to do the inserting: (insert
into tblob1 values (?,?,?))

For three runs of this test the average time to insert the 6M file into
the database in 8000 byte rows (which ended up being 801 rows inserted
into the table) was: 17.803 seconds

The second test read the file in 8000 byte chucks just like the first
program but it used a function to do the insert and called the function
via the FastPath API.  The function was:

CREATE FUNCTION BYTEA_WRITE (TEXT, INTEGER, BYTEA) RETURNS INTEGER
as '
BEGIN
   INSERT INTO TBLOB1 VALUES ($1, $2, $3);
RETURN 1;
END;'
language 'plpgsql'

For three runs of this test the average time to insert the 6M file into
the database in 8000 byte parts was:  2.645


Thus using the insert statement was almost an order of magnitude slower
that using the function (17.803 sec vs. 2.645 sec).

Reading the data back from the server via a standard select statement
takes on average: 1.674 seconds.

I tried to run gprof to see where the time was going, but for some
reason the gprof output on my gmon.out file doesn't have any timing
information (all times are reported as 0.0) and I haven't been able to
figure out why yet.  So I don't know what is taking up the bulk of the
time  (I suspect it is either the decoding of the bytea data which the
Fastpath function call avoids, or the parser which needs to parse 801 8K
SQL statements vs. the function which has to parse one 100 byte statement.)

I have attached the two test programs (they are in java and use jdbc)
and a SQL script that creates the table and function.

thanks,
--Barry
import java.io.*;
import java.util.*;
import java.sql.*;


public class BlobTestInsert {

    private Connection con = null;

    public BlobTestInsert() {
        try {
            Class.forName("org.postgresql.Driver");
            con = DriverManager.getConnection(
                    "jdbc:postgresql://localhost:5432/files", "blind", "");
            if (con != null) {
               System.out.println("Connected to database system.\n");
            }
        } catch (SQLException e) {
            System.out.println(e.getMessage());
            System.out.println("Could not connect to database system.\n");
        } catch (ClassNotFoundException e) {
            System.out.println("Class not found...:-(\n");
        }
    }

    private void store(String filename) {
        PreparedStatement ps = null;
        ResultSet r = null;
        File file = new File(filename);
        byte[] l_buf = new byte[8000];
        int l_size = 0;
        System.out.println("store start " + System.currentTimeMillis() + " file.name:" + file.getName() +
"file.length:"+file.length());
        try {
            FileInputStream fis = new FileInputStream(file);
            con.setAutoCommit(false);
            ps = con.prepareStatement(
                "INSERT INTO tblob1 VALUES (?, ?, ?);");
            while (l_size < file.length()) {
            fis.read(l_buf);
              ps.setString(1, filename);
              ps.setInt(2,l_size);
              ps.setBytes(3,l_buf);
              l_size += 8000;
              ps.executeUpdate();
            }
            ps.close();
            fis.close();
            con.commit();
            System.out.println("store end   " + System.currentTimeMillis());
        } catch (SQLException sqle) {
            System.err.println("Store content: " + sqle.getMessage());
        } catch (IOException ioe) {
        }
    }

    private void retrieve(String filename) {
        Statement s = null;
        ResultSet r = null;
        int byteSum = 0;
        byte[] buffer = new byte[8000];
        try {
            System.out.println("read start  " + System.currentTimeMillis() + " Trying to write: " +filename + "test");
            FileOutputStream out = new FileOutputStream(filename + "test");
            con.setAutoCommit(false);
            s = con.createStatement();
            r = s.executeQuery("SELECT data FROM tblob1 WHERE filename = '"
                               + filename +"' order by lastbyte");
            if (r != null) {
                while (r.next()) {
                    buffer = r.getBytes(1);
                    out.write(buffer);
                    byteSum += buffer.length;
                }
            }
            out.close();
            System.out.println("read end    " + System.currentTimeMillis() + " bytes written: " + byteSum);
            con.commit();
        } catch (SQLException sqle) {
            System.err.println("Retrieve content: " + sqle.getMessage());
        } catch (Exception ioe) {
            System.err.println("Writing stuff: " + ioe.getMessage());
        }
    }

    public static void main(String[] args) {
        BlobTestInsert bt = new BlobTestInsert();
        bt.store(args[0]);
        bt.retrieve(args[0]);
    }

}
import java.io.*;
import java.util.*;
import java.sql.*;
import org.postgresql.fastpath.Fastpath;
import org.postgresql.fastpath.FastpathArg;


public class BlobTestFunction {

    private Connection con = null;

    public BlobTestFunction() {
        try {
            Class.forName("org.postgresql.Driver");
            con = DriverManager.getConnection(
                    "jdbc:postgresql://localhost:5432/files", "blind", "");
            if (con != null) {
               System.out.println("Connected to database system.\n");
            }
        } catch (SQLException e) {
            System.out.println(e.getMessage());
            System.out.println("Could not connect to database system.\n");
        } catch (ClassNotFoundException e) {
            System.out.println("Class not found...:-(\n");
        }
    }

    private void store(String filename) {
        File file = new File(filename);
        byte[] l_buf = new byte[8000];
        int l_size = 0;
        System.out.println("store start " + System.currentTimeMillis() + " file.name:" + file.getName() + "
file.length:"+file.length());
        try {
            FileInputStream fis = new FileInputStream(file);
            con.setAutoCommit(false);
            Fastpath l_fp = ((org.postgresql.Connection)con).getFastpathAPI();
           // Now get the function oid's for the api
           Statement l_stmt = null;
           ResultSet l_rset = null;
           try {
             try {
               l_stmt = con.createStatement();
               l_rset = l_stmt.executeQuery("SELECT PRONAME, OID FROM PG_PROC " +
                                            "WHERE PRONAME = 'bytea_write'");
               if(l_rset == null) {
                 System.err.println("Missing funciton bytea_write");
               }
               l_fp.addFunctions(l_rset);
             } finally {
               if (l_rset != null) {
                 l_rset.close();
               }
               if (l_stmt != null) {
                 l_stmt.close();
               }
             }
           } catch (SQLException l_se) {
             System.err.println("Store content: " + l_se.getMessage());
           }

           FastpathArg args[] = new FastpathArg[3];
           while ( l_size < file.length()) {
         fis.read(l_buf);
             args[0] = new FastpathArg(filename);
             args[1] = new FastpathArg(l_size);
             args[2] = new FastpathArg(l_buf, 0, 8000);
             l_size += 8000;
             l_fp.fastpath("bytea_write", true, args);
           }
           fis.close();
           con.commit();
           System.out.println("store end   " + System.currentTimeMillis());
        } catch (SQLException sqle) {
            System.err.println("Store content: " + sqle.getMessage());
        } catch (IOException ioe) {
            System.err.println("Store content: " + ioe.getMessage());
        }
    }

    private void retrieve(String filename) {
        Statement s = null;
        ResultSet r = null;
        int byteSum = 0;
        byte[] buffer;
        try {
            System.out.println("read start  " + System.currentTimeMillis() + " Trying to write: " +filename + ".test");
            FileOutputStream out = new FileOutputStream(filename + ".test");
            s = con.createStatement();
            r = s.executeQuery("SELECT data FROM tblob1 WHERE filename = '"
                               + filename +"' order by lastbyte");
            if (r != null) {
                while (r.next()) {
                    buffer = r.getBytes(1);
                    out.write(buffer);
                    byteSum += buffer.length;
                }
            }
            out.close();
            System.out.println("read end    " + System.currentTimeMillis() + " bytes written: " + byteSum);
            con.commit();
        } catch (SQLException sqle) {
            System.err.println("Retrieve content: " + sqle.getMessage());
        } catch (Exception ioe) {
            System.err.println("Writing stuff: " + ioe.getMessage());
        }
    }

    public static void main(String[] args) {
        BlobTestFunction bt = new BlobTestFunction();
        bt.store(args[0]);
        bt.retrieve(args[0]);
    }

}
DROP TABLE TBLOB1;

CREATE TABLE TBLOB1 (FILENAME TEXT, LASTBYTE INTEGER, DATA BYTEA);


DROP FUNCTION BYTEA_WRITE(TEXT, INTEGER, BYTEA);

CREATE FUNCTION BYTEA_WRITE (TEXT, INTEGER, BYTEA) RETURNS INTEGER
as '
BEGIN
  INSERT INTO TBLOB1 VALUES ($1, $2, $3);
RETURN 1;
END;'
language 'plpgsql'

pgsql-hackers by date:

Previous
From: Tom Lane
Date:
Subject: Re: 7.2b3 pg_dump, general 7.2b3 comments
Next
From: Joe Conway
Date:
Subject: Re: 7.2b3 pg_dump, general 7.2b3 comments