Thread: why group expressions cause query to run forever

why group expressions cause query to run forever

From
"Andrus"
Date:
How to speed the following query? It seems to run forever.

explain SELECT
bilkaib.DB,
CASE WHEN dbkonto.objekt1='+' THEN bilkaib.DBOBJEKT ELSE '' END AS dbobjekt,
CASE WHEN dbkonto.objekt2='+' THEN bilkaib.DB2OBJEKT ELSE '' END AS
db2objekt,
CASE WHEN dbkonto.objekt3='+' THEN bilkaib.DB3OBJEKT ELSE '' END AS
db3objekt,
CASE WHEN dbkonto.objekt4='+' THEN bilkaib.DB4OBJEKT ELSE '' END AS
db4objekt,
CASE WHEN dbkonto.objekt5='+' THEN bilkaib.DB5OBJEKT ELSE '' END AS
db5objekt,
CASE WHEN dbkonto.objekt6='+' THEN bilkaib.DB6OBJEKT ELSE '' END AS
db6objekt,
CASE WHEN dbkonto.objekt7='+' THEN bilkaib.DB7OBJEKT ELSE '' END AS
db7objekt,
CASE WHEN dbkonto.objekt8='+' THEN bilkaib.DB8OBJEKT ELSE '' END AS
db8objekt,
CASE WHEN dbkonto.objekt9='+' THEN bilkaib.DB9OBJEKT ELSE '' END AS
db9objekt,
bilkaib.CR,
CASE WHEN crkonto.objekt1='+' THEN bilkaib.crOBJEKT ELSE '' END AS crobjekt,
CASE WHEN crkonto.objekt2='+' THEN bilkaib.cr2OBJEKT ELSE '' END AS
cr2objekt,
CASE WHEN crkonto.objekt3='+' THEN bilkaib.cr3OBJEKT ELSE '' END AS
cr3objekt,
CASE WHEN crkonto.objekt4='+' THEN bilkaib.cr4OBJEKT ELSE '' END AS
cr4objekt,
CASE WHEN crkonto.objekt5='+' THEN bilkaib.cr5OBJEKT ELSE '' END AS
cr5objekt,
CASE WHEN crkonto.objekt6='+' THEN bilkaib.cr6OBJEKT ELSE '' END AS
cr6objekt,
CASE WHEN crkonto.objekt7='+' THEN bilkaib.cr7OBJEKT ELSE '' END AS
cr7objekt,
CASE WHEN crkonto.objekt8='+' THEN bilkaib.cr8OBJEKT ELSE '' END AS
cr8objekt,
CASE WHEN crkonto.objekt9='+' THEN bilkaib.cr9OBJEKT ELSE '' END AS
cr9objekt,
bilkaib.RAHA,
CASE WHEN crkonto.klienkaupa OR dbkonto.klienkaupa
  OR crkonto.tyyp IN ('K','I') OR dbkonto.tyyp IN ('K','I')
THEN
  bilkaib.KLIENT ELSE '' END AS klient,

bilkaib.EXCHRATE,

CASE WHEN crkonto.klienkaupa OR dbkonto.klienkaupa
  OR crkonto.tyyp IN ('K','I') OR dbkonto.tyyp IN ('K','I')
THEN
  '' ELSE '' END AS kliendinim,  -- 24.

CASE WHEN crkonto.arvekaupa OR dbkonto.arvekaupa
  OR (bilkaib.cr<>'00' AND crkonto.tyyp='K')
  OR (bilkaib.db<>'00' AND dbkonto.tyyp='K')
THEN bilkaib.doknr ELSE CAST('' AS CHAR(25) ) END AS doknr

,CASE WHEN bilkaib.raha='EEK' THEN CAST('20060101' AS DATE) ELSE
bilkaib.kuupaev END AS kuupaev
,SUM(bilkaib.summa) AS summa
,CAST( 0 as numeric(12,2)) as rhsumma
  from BILKAIB join KONTO CRKONTO ON bilkaib.cr=crkonto.kontonr AND
      crkonto.iseloom='A'
    join KONTO DBKONTO ON bilkaib.db=dbkonto.kontonr AND
      dbkonto.iseloom='A'
     where
 bilkaib.kuupaev BETWEEN '2006-01-01' AND '2006-12-31'
  GROUP BY
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26

"GroupAggregate  (cost=83038.02..103020.42 rows=124890 width=759)"
"  ->  Sort  (cost=83038.02..83350.25 rows=124890 width=759)"
"        Sort Key: bilkaib.db, CASE WHEN (dbkonto.objekt1 = '+'::bpchar)
THEN bilkaib.dbobjekt ELSE ''::bpchar END, CASE WHEN (dbkonto.objekt2 =
'+'::bpchar) THEN bilkaib.db2objekt ELSE ''::bpchar END, CASE WHEN
(dbkonto.objekt3 = '+'::bpchar) THEN bilkaib. (..)"
"        ->  Hash Join  (cost=41.71..23348.23 rows=124890 width=759)"
"              Hash Cond: ("outer".cr = "inner".kontonr)"
"              ->  Hash Join  (cost=20.86..11676.02 rows=144696 width=707)"
"                    Hash Cond: ("outer".db = "inner".kontonr)"
"                    ->  Seq Scan on bilkaib  (cost=0.00..9369.99
rows=167643 width=655)"
"                          Filter: ((kuupaev >= '2006-01-01'::date) AND
(kuupaev <= '2006-12-31'::date))"
"                    ->  Hash  (cost=20.29..20.29 rows=227 width=66)"
"                          ->  Seq Scan on konto dbkonto  (cost=0.00..20.29
rows=227 width=66)"
"                                Filter: (iseloom = 'A'::bpchar)"
"              ->  Hash  (cost=20.29..20.29 rows=227 width=66)"
"                    ->  Seq Scan on konto crkonto  (cost=0.00..20.29
rows=227 width=66)"
"                          Filter: (iseloom = 'A'::bpchar)"


If I only replace column expressions with constant numbers, it runs fast:

explain analyze SELECT 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6
,SUM(bilkaib.summa) AS summa
,CAST( 0 as numeric(12,2)) as rhsumma
  from BILKAIB join KONTO CRKONTO ON bilkaib.cr=crkonto.kontonr AND
      crkonto.iseloom='A'
    join KONTO DBKONTO ON bilkaib.db=dbkonto.kontonr AND
      dbkonto.iseloom='A'
     where
 bilkaib.kuupaev BETWEEN '2006-01-01' AND '2006-12-31'
  GROUP BY
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26


"HashAggregate  (cost=22099.33..22099.34 rows=1 width=11) (actual
time=4518.820..4518.824 rows=1 loops=1)"
"  ->  Hash Join  (cost=41.71..13669.25 rows=124890 width=11) (actual
time=4.347..3445.650 rows=167349 loops=1)"
"        Hash Cond: ("outer".cr = "inner".kontonr)"
"        ->  Hash Join  (cost=20.86..11676.02 rows=144696 width=25) (actual
time=2.165..2076.951 rows=167349 loops=1)"
"              Hash Cond: ("outer".db = "inner".kontonr)"
"              ->  Seq Scan on bilkaib  (cost=0.00..9369.99 rows=167643
width=39) (actual time=0.012..725.813 rows=167349 loops=1)"
"                    Filter: ((kuupaev >= '2006-01-01'::date) AND (kuupaev
<= '2006-12-31'::date))"
"              ->  Hash  (cost=20.29..20.29 rows=227 width=14) (actual
time=2.112..2.112 rows=227 loops=1)"
"                    ->  Seq Scan on konto dbkonto  (cost=0.00..20.29
rows=227 width=14) (actual time=0.011..1.126 rows=227 loops=1)"
"                          Filter: (iseloom = 'A'::bpchar)"
"        ->  Hash  (cost=20.29..20.29 rows=227 width=14) (actual
time=2.149..2.149 rows=227 loops=1)"
"              ->  Seq Scan on konto crkonto  (cost=0.00..20.29 rows=227
width=14) (actual time=0.022..1.152 rows=227 loops=1)"
"                    Filter: (iseloom = 'A'::bpchar)"
"Total runtime: 4519.063 ms"

Postgres 8.1 on Gentoo Linux.

Andrus.



Re: why group expressions cause query to run forever

From
Tom Lane
Date:
"Andrus" <eetasoft@online.ee> writes:
> How to speed the following query? It seems to run forever.
> explain SELECT
> bilkaib.DB,
> CASE WHEN dbkonto.objekt1='+' THEN bilkaib.DBOBJEKT ELSE '' END AS dbobjekt,
> CASE WHEN dbkonto.objekt2='+' THEN bilkaib.DB2OBJEKT ELSE '' END AS
> db2objekt,
> CASE WHEN dbkonto.objekt3='+' THEN bilkaib.DB3OBJEKT ELSE '' END AS
> db3objekt,
> ...
>   GROUP BY
> 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26

I think the problem is probably that you're sorting two dozen CHAR
columns, and that in many of the rows all these entries are '' forcing
the sort code to compare all two dozen columns (not so)?  So the sort
ends up doing lots and lots and lots of CHAR comparisons.  Which can
be slow, especially in non-C locales.  What's your locale setting?

            regards, tom lane

Re: why group expressions cause query to run forever

From
"Andrus"
Date:
Tom,

thank you.

> I think the problem is probably that you're sorting two dozen CHAR
> columns, and that in many of the rows all these entries are '' forcing
> the sort code to compare all two dozen columns (not so)?  So the sort
> ends up doing lots and lots and lots of CHAR comparisons.  Which can
> be slow, especially in non-C locales.  What's your locale setting?

show all returns

"lc_collate";"en_US.UTF-8"
"lc_ctype";"en_US.UTF-8"
"lc_messages";"C"
"lc_monetary";"et_EE.utf-8"
"lc_numeric";"et_EE.utf-8"
"lc_time";"et_EE.utf-8"

How to speed up this  query ?
Is it possible to force the binary comparison for grouping ?
Should I concatenate all the char columns into single column ?

Andrus.




Re: why group expressions cause query to run forever

From
"Andrus"
Date:
> I think the problem is probably that you're sorting two dozen CHAR
> columns, and that in many of the rows all these entries are '' forcing
> the sort code to compare all two dozen columns (not so)?

Yes, most of columns return empty strings.

I changed empty strings to null, casted to varchar and simplyfied the
statment.
However, this select statement runs forever.

Any idea how to speed it up ?

Andrus.

SELECT
bilkaib.DB,
CASE WHEN dbkonto.objekt1='+' THEN bilkaib.DBOBJEKT ELSE null
END::VARCHAR(10) AS dbobjekt,
CASE WHEN dbkonto.objekt2='+' THEN bilkaib.DB2OBJEKT ELSE null
END::VARCHAR(10) AS db2objekt,
CASE WHEN dbkonto.objekt3='+' THEN bilkaib.DB3OBJEKT ELSE null
END::VARCHAR(10) AS db3objekt,
CASE WHEN dbkonto.objekt4='+' THEN bilkaib.DB4OBJEKT ELSE null
END::VARCHAR(10) AS db4objekt,
CASE WHEN dbkonto.objekt5='+' THEN bilkaib.DB5OBJEKT ELSE null
END::VARCHAR(10) AS db5objekt,
CASE WHEN dbkonto.objekt6='+' THEN bilkaib.DB6OBJEKT ELSE null
END::VARCHAR(10) AS db6objekt,
CASE WHEN dbkonto.objekt7='+' THEN bilkaib.DB7OBJEKT ELSE null
END::VARCHAR(10) AS db7objekt,
CASE WHEN dbkonto.objekt8='+' THEN bilkaib.DB8OBJEKT ELSE null
END::VARCHAR(10) AS db8objekt,
CASE WHEN dbkonto.objekt9='+' THEN bilkaib.DB9OBJEKT ELSE null
END::VARCHAR(10) AS db9objekt
  from BILKAIB join KONTO CRKONTO ON bilkaib.cr=crkonto.kontonr
  join KONTO DBKONTO ON bilkaib.db=dbkonto.kontonr
     where
 bilkaib.kuupaev BETWEEN '2006-01-01' AND '2006-12-31'
  GROUP BY 1,2,3,4,5,6,7,8,9,10