I've been testing how fast I am able to insert entries in a simple
table, and I am shocked how slow postgres gets when the table gets
large.
My test program takes one day of syslog messages (~400k) and inserts
them into a sql table. The table is created if missing. My benchmark
program does two 'select count(*) from syslog_test;' with 10 seconds
apart and calculates how many inserts was done in that period. It
then waits 4 minutes and repeats.
The insert peaks at 43/s with around 20k entries in the tables and
then quickly moves down to 15/s (100k) and 6/s (200k). Here are the
test script, the benchmark script and some numbers.
7697 -> 8072 11s 34 entry/s
17227 -> 17701 11s 43 entry/s
50427 -> 50824 14s 28 entry/s
98831 -> 99033 14s 14 entry/s
151528 -> 151645 12s 9 entry/s
200084 -> 200165 12s 6 entry/s
251277 -> 251341 12s 5 entry/s
BTW: I know this is a crude benchmark, but I really wants to insert
syslog data in less then a day. On the last machine I tested this (a
little slower then the current one), I was able to insert one days
syslog in 24 hours and 52 minues. A little to slow. :-)
========================================================================
#!/store/bin/perl5 -w
#
# Author: Petter Reinholdtsen <pere@td.org.uit.no>
# Date: 1998-11-17
#
# Import syslog messages into SQL. Create table syslog if missing.
#
# Usage:
# import-syslog.pl [-l level] [-f facility]
use DBI;
use strict;
use Getopt::Std;
use vars qw($dbh $rc $level $facility $filename $opt_f $opt_l);
my %global = (table => "syslog_test",
dbhost => 'localhost',
dbname => 'pere');
my @levels = qw(emerg alert crit err warning notice info debug);
my @facilities =
qw(kern user mail daemon auth syslog lpr news uucp cron local.*);
getopts("l:f:");
$dbh = DBI->connect("dbi:Pg:dbname=$global{dbname} host=$global{dbhost}");
init_table() unless ( table_exists() );
my ($lastfilename, $local_level, $local_facility);
while (<>) {
$filename = $ARGV;
if ($filename ne $lastfilename) {
print "New file $filename\n";
$lastfilename = $filename;
undef $opt_l if ($local_level);
undef $opt_f if ($local_facility);
undef $local_level;
undef $local_facility;
# Try to extract level or facility from filename
for $level (@levels) {
my @f = split(/\//, $filename);
my $lev;
if (($lev) = grep /^$level$/, @f) {
$opt_l = $lev;
print "Level=$opt_l\n";
$local_level=1;
}
}
for $facility (@facilities) {
my @f = split(/\//, $filename);
my $fac;
if (($fac) = grep /^$facility$/, @f) {
$opt_f = $fac;
print "Facility=$opt_f\n";
$local_facility=1;
}
}
}
chomp;
next if (/last message repeated/);
my $hashref = parse_old_format($_);
$hashref->{facility} = $opt_f if ($opt_f);
$hashref->{level} = $opt_l if ($opt_l);
sql_syslog_insert($hashref);# if ($hashref);
}
#$rv = $dbh->do("SELECT * from test");
$rc = $dbh->disconnect;
print "Error disconnectiong\n" unless ($rc);
sub syslog_to_sql_date {
my $date = shift;
my ($year) = (localtime)[5]+1900;
$date .=" $year";
return $date;
}
sub parse_old_format ($) {
my $line = shift;
my ($date, $host, $prog, $pid, $info);
($date, $host) =
$line =~ m/^(\w+ \d+ \d+:\d+:\d+) ([\.\w-]+) /;
if ($date) { # Remove date and host info
$line =~ s/^\w+ \d+ \d+:\d+:\d+ [\.\w-]+ //;
}
($prog) =
$line =~ m/^(\S+)[\d\[\]]*: .+$/;
if ($prog) {
($pid) =
$prog =~ m/\[(\d+)\]/;
$prog =~ s/\[\d+\]//;
}
($info) =
$line =~ m/: (.+)$/;
$date = syslog_to_sql_date($date);
if ( ! $host ) {
print "E: $line\n\n";
return;
}
return {when => $date,
host => $host,
prog => $prog,
pid => $pid,
info => $info
};
}
sub sql_syslog_insert {
my $hashref = shift;
my $sql = "INSERT INTO $global{table} (";
my $values = "";
my ($key, @keys, @values);
for $key (keys %$hashref) {
if ( $hashref->{$key} ) {
push(@keys, $key);
push(@values, sql_escape($hashref->{$key}));
}
}
$sql .= join(",", @keys);
$sql .= ") VALUES ('";
$sql .= join("','", @values);
$sql .= "')";
my $rv = $dbh->do($sql);
print "ES: $sql\n\n" if (!$rv);
}
sub sql_escape {
my($str) = shift;
return undef if ( !defined $str);
$str =~ s/\\/\\\\/;
$str =~ s/\'/\'\'/g;
return $str;
}
sub table_exists {
if ( $dbh->do("SELECT * FROM $global{table}") ) {
return 1; # TRUE
} else {
return ""; # FALSE
}
}
sub init_table {
# -priority (emerg/alert/crit/err/warning/notice/info/debug)
# -facility (kern/user/mail/daemon/auth/syslog/lpr/news/uucp/cron/local*)
# dato
# hostname
# prog
# pid
# info
my $sql =
"CREATE TABLE $global{table} (".
"when datetime NOT NULL,".
"host varchar(40) NOT NULL,".
"level varchar(20),".
"facility varchar(20),".
"prog varchar(20),".
"pid int,".
"info varchar(1024) NOT NULL".
")";
print "Init table:\n";
$dbh->do($sql);
}
========================================================================
#!/bin/sh
# Author: Petter Reinholdtsen <pere@td.org.uit.no>
#
# Benchmark insert rate
while true ; do
date
starttime=`date +%s`
startcount=`psql -c 'select count(*) from syslog_test;'|grep '^[0-9 ]'|grep -v count`
sleep 10
endtime=`date +%s`
endcount=`psql -c 'select count(*) from syslog_test;'|grep '^[0-9 ]'|grep -v count`
time=`echo $endtime - $starttime | bc `
rate=`echo "($endcount - $startcount)/$time"|bc`
echo "$startcount -> $endcount $time s $rate entry/s"
sleep 240
done
========================================================================
--
##> Petter Reinholdtsen <## | pere@td.org.uit.no
O- <SCRIPT Language="Javascript">window.close()</SCRIPT>
http://www.hungry.com/~pere/ | Go Mozilla, go! Go!