Thread: Slow query

Slow query

From
Ross Elliott
Date:
Maybe someone can explain this. The following SQL will reproduce our issue:
DROP TABLE IF EXISTS t1 CASCADE;
CREATE TABLE t1 (name text,
                 state text);
CREATE INDEX t1_name ON t1(name);
CREATE INDEX t1_state ON t1(state);
CREATE INDEX t1_name_state ON t1(name,state);

-- Create some sample data
DO $$
DECLARE
states text[] := array['UNKNOWN', 'TODO', 'DONE', 'UNKNOWN'];
BEGIN
FOR v IN 1..200000 LOOP
  INSERT INTO t1 VALUES('user'||v, states[floor(random()*4)]);
  INSERT INTO t1 VALUES('user'||v, states[floor(random()*4)]);
  INSERT INTO t1 VALUES('user'||v, states[floor(random()*4)]);
  INSERT INTO t1 VALUES('user'||v, states[floor(random()*4)]);
END LOOP;
END $$;


CREATE OR REPLACE FUNCTION state_to_int(state character varying) RETURNS integer
    LANGUAGE plpgsql IMMUTABLE STRICT
    AS $$BEGIN
IF state = 'UNKNOWN' THEN RETURN 0;
ELSIF state = 'TODO' THEN RETURN 1;
ELSIF state = 'DONE' THEN RETURN 2;
ELSIF state = 'NOT REQUIRED' THEN RETURN 3;
ELSE RAISE EXCEPTION 'state_to_int called with invalid state value';
END IF;
END;$$;

CREATE OR REPLACE FUNCTION int_to_state(state integer) RETURNS character varying
    LANGUAGE plpgsql IMMUTABLE STRICT
    AS $$BEGIN
IF state = 0 THEN RETURN 'UNKNOWN';
ELSIF state = 1 THEN RETURN 'TODO';
ELSIF state = 2 THEN RETURN 'DONE';
ELSIF state = 3 THEN RETURN 'NOT REQUIRED';
ELSE RAISE EXCEPTION 'int_to_state called with invalid state value';
END IF;
END;$$;

-- Why is this a lot slower
explain (analyse, buffers) select name, int_to_state(min(state_to_int(state))) as status from t1 group by t1.name;

-- Than this?
explain (analyze, buffers) select name, (array['UNKNOWN', 'TODO', 'DONE', 'NOT REQUIRED'])[min(
CASE state
WHEN 'UNKNOWN' THEN 0
WHEN 'TODO' THEN 1
WHEN 'DONE' THEN 2
WHEN 'NOT REQUIRED' THEN 3
END)] AS status from t1 group by t1.name;

-- This is also very much slower
explain (analyze, buffers) select name, (array['UNKNOWN', 'TODO', 'DONE', 'NOT REQUIRED'])[min(state_to_int(state))] AS status from t1 group by t1.name;

This was done on:
PostgreSQL 9.3.5 on x86_64-unknown-linux-gnu, compiled by gcc (Ubuntu 4.8.2-19ubuntu1) 4.8.2, 64-bit

We get results like this:
                                                            QUERY PLAN                                                             
-----------------------------------------------------------------------------------------------------------------------------------
 GroupAggregate  (cost=0.42..280042.62 rows=208120 width=15) (actual time=0.076..2439.066 rows=200000 loops=1)
   Buffers: shared hit=53146
   ->  Index Scan using t1_name on t1  (cost=0.42..21931.42 rows=800000 width=15) (actual time=0.009..229.477 rows=800000 loops=1)
         Buffers: shared hit=53146
 Total runtime: 2460.860 ms
(5 rows)

                                                            QUERY PLAN                                                             
-----------------------------------------------------------------------------------------------------------------------------------
 GroupAggregate  (cost=0.42..36012.62 rows=208120 width=15) (actual time=0.017..559.384 rows=200000 loops=1)
   Buffers: shared hit=53146
   ->  Index Scan using t1_name on t1  (cost=0.42..21931.42 rows=800000 width=15) (actual time=0.008..197.133 rows=800000 loops=1)
         Buffers: shared hit=53146
 Total runtime: 574.550 ms
(5 rows)

                                                            QUERY PLAN                                                             
-----------------------------------------------------------------------------------------------------------------------------------
 GroupAggregate  (cost=0.42..228012.62 rows=208120 width=15) (actual time=0.042..2089.367 rows=200000 loops=1)
   Buffers: shared hit=53146
   ->  Index Scan using t1_name on t1  (cost=0.42..21931.42 rows=800000 width=15) (actual time=0.008..237.854 rows=800000 loops=1)
         Buffers: shared hit=53146
 Total runtime: 2111.004 ms
(5 rows)


We cannot change our table structure to reflect something more sensible. What we would really like to know is why using functions is so much slower than the unreadable method.

Regards

Ross


Re: Slow query

From
David G Johnston
Date:
Ross Elliott-2 wrote
> Maybe someone can explain this. The following SQL will reproduce our
> issue:
> DROP TABLE IF EXISTS t1 CASCADE;
> CREATE TABLE t1 (name text,
>                  state text);
> CREATE INDEX t1_name ON t1(name);
> CREATE INDEX t1_state ON t1(state);
> CREATE INDEX t1_name_state ON t1(name,state);
>
> -- Create some sample data
> DO $$
> DECLARE
> states text[] := array['UNKNOWN', 'TODO', 'DONE', 'UNKNOWN'];
> BEGIN
> FOR v IN 1..200000 LOOP
>   INSERT INTO t1 VALUES('user'||v, states[floor(random()*4)]);
>   INSERT INTO t1 VALUES('user'||v, states[floor(random()*4)]);
>   INSERT INTO t1 VALUES('user'||v, states[floor(random()*4)]);
>   INSERT INTO t1 VALUES('user'||v, states[floor(random()*4)]);
> END LOOP;
> END $$;
>
>
> CREATE OR REPLACE FUNCTION state_to_int(state character varying) RETURNS
> integer
>     LANGUAGE plpgsql IMMUTABLE STRICT
>     AS $$BEGIN
> IF state = 'UNKNOWN' THEN RETURN 0;
> ELSIF state = 'TODO' THEN RETURN 1;
> ELSIF state = 'DONE' THEN RETURN 2;
> ELSIF state = 'NOT REQUIRED' THEN RETURN 3;
> ELSE RAISE EXCEPTION 'state_to_int called with invalid state value';
> END IF;
> END;$$;
>
> CREATE OR REPLACE FUNCTION int_to_state(state integer) RETURNS character
> varying
>     LANGUAGE plpgsql IMMUTABLE STRICT
>     AS $$BEGIN
> IF state = 0 THEN RETURN 'UNKNOWN';
> ELSIF state = 1 THEN RETURN 'TODO';
> ELSIF state = 2 THEN RETURN 'DONE';
> ELSIF state = 3 THEN RETURN 'NOT REQUIRED';
> ELSE RAISE EXCEPTION 'int_to_state called with invalid state value';
> END IF;
> END;$$;
>
> -- Why is this a lot slower
> explain (analyse, buffers) select name,
> int_to_state(min(state_to_int(state))) as status from t1 group by t1.name;
>
> -- Than this?
> explain (analyze, buffers) select name, (array['UNKNOWN', 'TODO', 'DONE',
> 'NOT REQUIRED'])[min(
> CASE state
> WHEN 'UNKNOWN' THEN 0
> WHEN 'TODO' THEN 1
> WHEN 'DONE' THEN 2
> WHEN 'NOT REQUIRED' THEN 3
> END)] AS status from t1 group by t1.name;
>
> -- This is also very much slower
> explain (analyze, buffers) select name, (array['UNKNOWN', 'TODO', 'DONE',
> 'NOT REQUIRED'])[min(state_to_int(state))] AS status from t1 group by
> t1.name;
>
> This was done on:
> PostgreSQL 9.3.5 on x86_64-unknown-linux-gnu, compiled by gcc (Ubuntu
> 4.8.2-19ubuntu1) 4.8.2, 64-bit
>
> We get results like this:
>                                                             QUERY PLAN
>
>
-----------------------------------------------------------------------------------------------------------------------------------
>  GroupAggregate  (cost=0.42..280042.62 rows=208120 width=15) (actual
> time=0.076..2439.066 rows=200000 loops=1)
>    Buffers: shared hit=53146
>    ->  Index Scan using t1_name on t1  (cost=0.42..21931.42 rows=800000
> width=15) (actual time=0.009..229.477 rows=800000 loops=1)
>          Buffers: shared hit=53146
>  Total runtime: 2460.860 ms
> (5 rows)
>
>                                                             QUERY PLAN
>
>
-----------------------------------------------------------------------------------------------------------------------------------
>  GroupAggregate  (cost=0.42..36012.62 rows=208120 width=15) (actual
> time=0.017..559.384 rows=200000 loops=1)
>    Buffers: shared hit=53146
>    ->  Index Scan using t1_name on t1  (cost=0.42..21931.42 rows=800000
> width=15) (actual time=0.008..197.133 rows=800000 loops=1)
>          Buffers: shared hit=53146
>  Total runtime: 574.550 ms
> (5 rows)
>
>                                                             QUERY PLAN
>
>
-----------------------------------------------------------------------------------------------------------------------------------
>  GroupAggregate  (cost=0.42..228012.62 rows=208120 width=15) (actual
> time=0.042..2089.367 rows=200000 loops=1)
>    Buffers: shared hit=53146
>    ->  Index Scan using t1_name on t1  (cost=0.42..21931.42 rows=800000
> width=15) (actual time=0.008..237.854 rows=800000 loops=1)
>          Buffers: shared hit=53146
>  Total runtime: 2111.004 ms
> (5 rows)
>
>
> We cannot change our table structure to reflect something more sensible.
> What we would really like to know is why using functions is so much slower
> than the unreadable method.
>
> Regards
>
> Ross

Pl/pgsql functions are black boxes and expensive to execute; you should
define these functions as SQL functions and see if that helps.

David J.






--
View this message in context: http://postgresql.1045698.n5.nabble.com/Slow-query-tp5820086p5820096.html
Sent from the PostgreSQL - performance mailing list archive at Nabble.com.