Thread: Optimizing query

Optimizing query

From
pasman pasmański
Date:
Hello.

I have a query which works a bit slow.

It's runned on desktop computer: AMD Athlon X2 2GHz , Win Xp sp2, 1GB ram.
Postgres 8.4.5 with some changes in config:

shared_buffers = 200MB                  # min 128kB
                               # (change requires restart)
temp_buffers = 8MB                      # min 800kB
work_mem = 12MB                         # min 64kB
maintenance_work_mem = 32MB             # min 1MB

Indexes in table "NumeryA":
"NTA", "NKA", "KodBłędu", "Plik"        primary key
"DataPliku", "KodBłędu" index dp_kb
"NKA", "NTA"                        index nka_nta

Indexes in table "Rejestr stacji do naprawy":
"LP"    - primary key
"Numer kierunkowy", substr("Numer stacji"::text, 1, 5)  - index "3"
"Data weryfikacji"      - index "Data weryfikacji_1"
"Numer kierunkowy", "Numer stacji", "Data odrzucania bilingu z
Serat"  - index "Powtórzenia"

---------------------
Query is:
----------------------
SELECT
 A."NKA",
 A."NTA",
 Min("PołączeniaMin") || ',' || Max("PołączeniaMax") AS "Biling",
 Sum("Ile")::text AS "Ilość CDR",
 R."LP"::text AS "Sprawa",
 (R."Osoba weryfikująca") AS "Osoba",
 to_char(min("Wartość"),'FM9999990D00') AS "Wartość po kontroli",
 max(R."Kontrola po naprawie w Serat - CDR")::text AS "CDR po kontroli",
 min(A."KodBłędu")::text AS KodBłędu,
 Max(to_char(R."Data kontroli",'YYYY-MM-DD')) AS "Ostatnia Kontrola"
, max("Skutek wprowadzenia błednej ewidencji w Serat") as "Skutek"
, sum(www.a_biling_070("NRB"))::text
, sum(www.a_biling_darmowy("NRB"))::text
FROM
 (SELECT "NumeryA".*
  FROM ONLY "NumeryA"
  WHERE "DataPliku" >= current_date-4*30 and "KodBłędu"=74::text
 ) AS A
LEFT JOIN
 (SELECT * FROM "Rejestr stacji do naprawy"
  WHERE "Data weryfikacji" >= current_date-4*30
 ) AS R
ON
 A."NKA" = R."Numer kierunkowy"
 and substr(A."NTA",1,5) = substr(R."Numer stacji",1,5)
 and A."NTA" like R."Numer stacji"
GROUP BY R."Osoba weryfikująca",R."LP",A."NKA", A."NTA"
ORDER BY Sum("Ile") DESC
LIMIT 5000
----------------------
Explain analyze:
----------------------

"Limit  (cost=30999.84..31012.34 rows=5000 width=149) (actual
time=7448.483..7480.094 rows=5000 loops=1)"
"  ->  Sort  (cost=30999.84..31073.19 rows=29341 width=149) (actual
time=7448.475..7459.663 rows=5000 loops=1)"
"        Sort Key: (sum("NumeryA"."Ile"))"
"        Sort Method:  top-N heapsort  Memory: 1488kB"
"        ->  GroupAggregate  (cost=11093.77..29050.46 rows=29341
width=149) (actual time=4700.654..7377.762 rows=14225 loops=1)"
"              ->  Sort  (cost=11093.77..11167.12 rows=29341
width=149) (actual time=4699.587..4812.776 rows=46732 loops=1)"
"                    Sort Key: "Rejestr stacji do naprawy"."Osoba
weryfikująca", "Rejestr stacji do naprawy"."LP", "NumeryA"."NKA",
"NumeryA"."NTA""
"                    Sort Method:  quicksort  Memory: 9856kB"
"                    ->  Merge Left Join  (cost=8297.99..8916.58
rows=29341 width=149) (actual time=2931.449..3735.876 rows=46732
loops=1)"
"                          Merge Cond: ((("NumeryA"."NKA")::text =
("Rejestr stacji do naprawy"."Numer kierunkowy")::text) AND
((substr(("NumeryA"."NTA")::text, 1, 5)) = (substr(("Rejestr stacji do
naprawy"."Numer stacji")::text, 1, 5))))"
"                          Join Filter: (("NumeryA"."NTA")::text ~~
("Rejestr stacji do naprawy"."Numer stacji")::text)"
"                          ->  Sort  (cost=6062.18..6135.53 rows=29341
width=95) (actual time=2131.297..2241.303 rows=46694 loops=1)"
"                                Sort Key: "NumeryA"."NKA",
(substr(("NumeryA"."NTA")::text, 1, 5))"
"                                Sort Method:  quicksort  Memory: 7327kB"
"                                ->  Bitmap Heap Scan on "NumeryA"
(cost=1502.09..3884.98 rows=29341 width=95) (actual
time=282.570..1215.355 rows=46694 loops=1)"
"                                      Recheck Cond: (("DataPliku" >=
(('now'::text)::date - 120)) AND (("KodBłędu")::text = '74'::text))"
"                                      ->  Bitmap Index Scan on dp_kb
(cost=0.00..1494.75 rows=29341 width=0) (actual time=281.991..281.991
rows=46694 loops=1)"
"                                            Index Cond: (("DataPliku"
>= (('now'::text)::date - 120)) AND (("KodBłędu")::text =
'74'::text))"
"                          ->  Sort  (cost=2235.82..2285.03 rows=19684
width=64) (actual time=800.101..922.463 rows=54902 loops=1)"
"                                Sort Key: "Rejestr stacji do
naprawy"."Numer kierunkowy", (substr(("Rejestr stacji do
naprawy"."Numer stacji")::text, 1, 5))"
"                                Sort Method:  quicksort  Memory: 3105kB"
"                                ->  Seq Scan on "Rejestr stacji do
naprawy"  (cost=0.00..831.88 rows=19684 width=64) (actual
time=2.118..361.463 rows=19529 loops=1)"
"                                      Filter: ("Data weryfikacji" >=
(('now'::text)::date - 120))"
"Total runtime: 7495.697 ms"
---------------------------------

How to make it faster ?



------------
pasman

Re: Optimizing query

From
"Pierre C"
Date:

Note that your LEFT JOIN condition is probably quite slow...

Please post EXPLAIN ANALYZE for this simplified version :

SELECT
    R."Osoba weryfikuj?ca",
    R."LP",
    A."NKA",
    A."NTA",
    Sum("Ile")
FROM        "NumeryA" A
LEFT JOIN    "Rejestr stacji do naprawy" R ON (
        A."NKA" = R."Numer kierunkowy"
    and A."NTA" like R."Numer stacji"
    and substr(A."NTA",1,5) = substr(R."Numer stacji",1,5)
)
WHERE
        A."DataPliku" >= current_date-4*30
    and A."KodB??du"=74::text
    and R."Data weryfikacji" >= current_date-4*30
GROUP BY R."Osoba weryfikuj?ca",R."LP",A."NKA", A."NTA"
ORDER BY Sum("Ile") DESC
LIMIT 5000

And also post EXPLAIN ANALYZE for this :

SELECT
    A."NKA",
    A."NTA",
    Sum("Ile") AS ss -- if it's in this table
FROM        "NumeryA" A
WHERE
        A."DataPliku" >= current_date-4*30
    and A."KodB??du"=74::text
GROUP BY A."NKA", A."NTA"

Re: Optimizing query

From
pasman pasmański
Date:
Thanks for reply.


First query:

SELECT
 R."Osoba weryfikująca" AS "Osoba",
 R."LP"::text AS "Sprawa",
 A."NKA",
 A."NTA",
 Sum(A."Ile")::text AS "Ilość CDR"
FROM
  ONLY "NumeryA" A
LEFT JOIN
 "Rejestr stacji do naprawy" R
ON
 A."NKA" = R."Numer kierunkowy"
 and A."NTA" like R."Numer stacji"
 and substr(A."NTA",1,5) = substr(R."Numer stacji",1,5)

WHERE
  "DataPliku" >= current_date-4*30
  and "KodBłędu"=74::text
  and "Data weryfikacji" >= current_date-4*30

GROUP BY R."Osoba weryfikująca",R."LP",A."NKA", A."NTA"
ORDER BY Sum("Ile") DESC
LIMIT 5000

-----------------------
Explain analyze
-----------------------

"Limit  (cost=8806.28..8806.30 rows=5 width=28) (actual
time=2575.143..2607.092 rows=5000 loops=1)"
"  ->  Sort  (cost=8806.28..8806.30 rows=5 width=28) (actual
time=2575.135..2586.797 rows=5000 loops=1)"
"        Sort Key: (sum(a."Ile"))"
"        Sort Method:  quicksort  Memory: 929kB"
"        ->  HashAggregate  (cost=8806.12..8806.23 rows=5 width=28)
(actual time=2500.549..2544.315 rows=9564 loops=1)"
"              ->  Merge Join  (cost=8196.81..8806.04 rows=5 width=28)
(actual time=1583.222..2368.858 rows=37364 loops=1)"
"                    Merge Cond: (((a."NKA")::text = (r."Numer
kierunkowy")::text) AND ((substr((a."NTA")::text, 1, 5)) =
(substr((r."Numer stacji")::text, 1, 5))))"
"                    Join Filter: ((a."NTA")::text ~~ (r."Numer stacji")::text)"
"                    ->  Sort  (cost=5883.01..5952.95 rows=27977
width=15) (actual time=1006.220..1118.692 rows=46769 loops=1)"
"                          Sort Key: a."NKA", (substr((a."NTA")::text, 1, 5))"
"                          Sort Method:  quicksort  Memory: 4313kB"
"                          ->  Bitmap Heap Scan on "NumeryA" a
(cost=1454.33..3816.64 rows=27977 width=15) (actual
time=16.331..158.007 rows=46769 loops=1)"
"                                Recheck Cond: (("DataPliku" >=
(('now'::text)::date - 120)) AND (("KodBłędu")::text = '74'::text))"
"                                ->  Bitmap Index Scan on dp_kb
(cost=0.00..1447.34 rows=27977 width=0) (actual time=15.838..15.838
rows=46769 loops=1)"
"                                      Index Cond: (("DataPliku" >=
(('now'::text)::date - 120)) AND (("KodBłędu")::text = '74'::text))"
"                    ->  Sort  (cost=2313.79..2364.81 rows=20410
width=24) (actual time=576.966..703.179 rows=56866 loops=1)"
"                          Sort Key: r."Numer kierunkowy",
(substr((r."Numer stacji")::text, 1, 5))"
"                          Sort Method:  quicksort  Memory: 1973kB"
"                          ->  Seq Scan on "Rejestr stacji do naprawy"
r  (cost=0.00..852.74 rows=20410 width=24) (actual time=0.050..143.901
rows=20768 loops=1)"
"                                Filter: ("Data weryfikacji" >=
(('now'::text)::date - 120))"
"Total runtime: 2620.220 ms"

---------------------------
Second query:
----------------------------
SELECT
    A."NKA",
    A."NTA",
    Sum("Ile") AS ss -- if it's in this table
FROM        "NumeryA" A
WHERE
        A."DataPliku" >= current_date-4*30
    and A."KodBłędu"=74::text
GROUP BY A."NKA", A."NTA"

--------------------------------
Explain analyze:
--------------------------------

"HashAggregate  (cost=20616.64..20643.22 rows=2798 width=15) (actual
time=13244.712..13284.490 rows=14288 loops=1)"
"  ->  Append  (cost=1454.33..20406.79 rows=27979 width=15) (actual
time=16.811..13093.395 rows=46769 loops=1)"
"        ->  Bitmap Heap Scan on "NumeryA" a  (cost=1454.33..3816.64
rows=27977 width=15) (actual time=16.804..141.495 rows=46769 loops=1)"
"              Recheck Cond: (("DataPliku" >= (('now'::text)::date -
120)) AND (("KodBłędu")::text = '74'::text))"
"              ->  Bitmap Index Scan on dp_kb  (cost=0.00..1447.34
rows=27977 width=0) (actual time=16.289..16.289 rows=46769 loops=1)"
"                    Index Cond: (("DataPliku" >= (('now'::text)::date
- 120)) AND (("KodBłędu")::text = '74'::text))"
"        ->  Seq Scan on "NumeryA_2008" a  (cost=0.00..16590.16 rows=2
width=15) (actual time=12759.731..12759.731 rows=0 loops=1)"
"              Filter: ((("KodBłędu")::text = '74'::text) AND
("DataPliku" >= (('now'::text)::date - 120)))"
"Total runtime: 13314.149 ms"


The first query looks to work faster than original (6s) thanks !!! :)



------------
pasman