Re: Converting row elements into a arrays? - Mailing list pgsql-general
From | Pavel Stehule |
---|---|
Subject | Re: Converting row elements into a arrays? |
Date | |
Msg-id | CAFj8pRB1UpqgSQe4Daa4CCZP6Kj+rVRNGrVeG5+5o4GtgKcBKQ@mail.gmail.com Whole thread Raw |
In response to | Re: Converting row elements into a arrays? (Merlin Moncure <mmoncure@gmail.com>) |
List | pgsql-general |
pá 3. 3. 2023 v 21:51 odesílatel Merlin Moncure <mmoncure@gmail.com> napsal:
On Thu, Mar 2, 2023 at 3:47 PM Ron <ronljohnsonjr@gmail.com> wroteI'm used to the bog standard COUNT, AVG, MIN, MAX, SUM. It didn't occur to me that there would be others...wait until you find out you can write your own:CREATE OR REPLACE FUNCTION agg_leftagg(l TEXT, r anyelement) returns text as
$$
BEGIN
RETURN CASE
WHEN l IS NOT NULL THEN format('%s-%s', l, r)
ELSE r::TEXT
END;
END;
$$ LANGUAGE PLPGSQL;
CREATE AGGREGATE leftagg(anyelement) (SFUNC=agg_leftagg, STYPE=TEXT);
CREATE TEMP TABLE s AS SELECT generate_series(1,10) s;SELECT leftagg(s) FROM s GROUP BY random() > .5;postgres=# SELECT leftagg(s) FROM s GROUP BY random() > .5;
leftagg
────────────
2-3-5-6-10
1-4-7-8-9
(2 rows)
This can work, but can be slower for large data
fast (although not too effect :-)) way
(2023-03-04 06:22:56) postgres=# CREATE TEMP TABLE s AS SELECT generate_series(1,10) s;
SELECT 10
(2023-03-04 06:22:57) postgres=# SELECT array_agg(s) FROM s GROUP BY random() > .5;
┌──────────────┐
│ array_agg │
╞══════════════╡
│ {3,6,8,9,10} │
│ {1,2,4,5,7} │
└──────────────┘
(2 rows)
(2023-03-04 06:23:21) postgres=# SELECT array_to_string(array_agg(s), '-') FROM s GROUP BY random() > .5;
┌──────────────────┐
│ array_to_string │
╞══════════════════╡
│ 1-2 │
│ 3-4-5-6-7-8-9-10 │
└──────────────────┘
(2 rows)
SELECT 10
(2023-03-04 06:22:57) postgres=# SELECT array_agg(s) FROM s GROUP BY random() > .5;
┌──────────────┐
│ array_agg │
╞══════════════╡
│ {3,6,8,9,10} │
│ {1,2,4,5,7} │
└──────────────┘
(2 rows)
(2023-03-04 06:23:21) postgres=# SELECT array_to_string(array_agg(s), '-') FROM s GROUP BY random() > .5;
┌──────────────────┐
│ array_to_string │
╞══════════════════╡
│ 1-2 │
│ 3-4-5-6-7-8-9-10 │
└──────────────────┘
(2 rows)
performance comparison on 1mil rows (but with enabled asserts), I modified the query for returning 100 groups because building extra long strings are slow
(2023-03-04 06:33:04) postgres=# EXPLAIN ANALYZE SELECT array_to_string(array_agg(s), '-') FROM s GROUP BY (random() * 100)::int;
┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ QUERY PLAN │
╞═════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╡
│ GroupAggregate (cost=135257.34..165257.34 rows=1000000 width=36) (actual time=715.400..1128.007 rows=101 loops=1) │
│ Group Key: (((random() * '100'::double precision))::integer) │
│ -> Sort (cost=135257.34..137757.34 rows=1000000 width=8) (actual time=712.689..853.335 rows=1000000 loops=1) │
│ Sort Key: (((random() * '100'::double precision))::integer) │
│ Sort Method: external merge Disk: 17664kB │
│ -> Seq Scan on s (cost=0.00..21925.00 rows=1000000 width=8) (actual time=6.135..192.553 rows=1000000 loops=1) │
│ Planning Time: 0.082 ms │
│ JIT: │
│ Functions: 7 │
│ Options: Inlining false, Optimization false, Expressions true, Deforming true │
│ Timing: Generation 0.484 ms, Inlining 0.000 ms, Optimization 0.276 ms, Emission 5.877 ms, Total 6.637 ms │
│ Execution Time: 1133.816 ms │
└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
(12 rows)
┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ QUERY PLAN │
╞═════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╡
│ GroupAggregate (cost=135257.34..165257.34 rows=1000000 width=36) (actual time=715.400..1128.007 rows=101 loops=1) │
│ Group Key: (((random() * '100'::double precision))::integer) │
│ -> Sort (cost=135257.34..137757.34 rows=1000000 width=8) (actual time=712.689..853.335 rows=1000000 loops=1) │
│ Sort Key: (((random() * '100'::double precision))::integer) │
│ Sort Method: external merge Disk: 17664kB │
│ -> Seq Scan on s (cost=0.00..21925.00 rows=1000000 width=8) (actual time=6.135..192.553 rows=1000000 loops=1) │
│ Planning Time: 0.082 ms │
│ JIT: │
│ Functions: 7 │
│ Options: Inlining false, Optimization false, Expressions true, Deforming true │
│ Timing: Generation 0.484 ms, Inlining 0.000 ms, Optimization 0.276 ms, Emission 5.877 ms, Total 6.637 ms │
│ Execution Time: 1133.816 ms │
└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
(12 rows)
array_to_string(array_agg()) .. 1sec
agg_leftagg .. 27 sec
using final function can be faster
create or replace function agg_leftagg_final(anycompatiblearray)
returns text as $$
begin
return array_to_string($1, '-');
end;
$$ language plpgsql;
returns text as $$
begin
return array_to_string($1, '-');
end;
$$ language plpgsql;
CREATE AGGREGATE leftagg2(anycompatible) (SFUNC=array_append, STYPE = anycompatiblearray, INITCOND = '{}', FINALFUNC = agg_leftagg_final);
(2023-03-04 06:57:18) postgres=# EXPLAIN ANALYZE SELECT leftagg2(s) FROM s GROUP BY (random() * 100)::int;
┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ QUERY PLAN │
╞═══════════════════════════════════════════════════════════════════════════════════════════════════════════════════╡
│ HashAggregate (cost=78175.00..353487.50 rows=1000000 width=36) (actual time=436.202..540.029 rows=101 loops=1) │
│ Group Key: ((random() * '100'::double precision))::integer │
│ Planned Partitions: 256 Batches: 1 Memory Usage: 11930kB │
│ -> Seq Scan on s (cost=0.00..21925.00 rows=1000000 width=8) (actual time=5.710..174.016 rows=1000000 loops=1) │
│ Planning Time: 0.231 ms │
│ JIT: │
│ Functions: 7 │
│ Options: Inlining false, Optimization false, Expressions true, Deforming true │
│ Timing: Generation 1.491 ms, Inlining 0.000 ms, Optimization 0.240 ms, Emission 5.471 ms, Total 7.202 ms │
│ Execution Time: 542.007 ms │
└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
(10 rows)
Time: 543,101 ms
┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ QUERY PLAN │
╞═══════════════════════════════════════════════════════════════════════════════════════════════════════════════════╡
│ HashAggregate (cost=78175.00..353487.50 rows=1000000 width=36) (actual time=436.202..540.029 rows=101 loops=1) │
│ Group Key: ((random() * '100'::double precision))::integer │
│ Planned Partitions: 256 Batches: 1 Memory Usage: 11930kB │
│ -> Seq Scan on s (cost=0.00..21925.00 rows=1000000 width=8) (actual time=5.710..174.016 rows=1000000 loops=1) │
│ Planning Time: 0.231 ms │
│ JIT: │
│ Functions: 7 │
│ Options: Inlining false, Optimization false, Expressions true, Deforming true │
│ Timing: Generation 1.491 ms, Inlining 0.000 ms, Optimization 0.240 ms, Emission 5.471 ms, Total 7.202 ms │
│ Execution Time: 542.007 ms │
└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
(10 rows)
Time: 543,101 ms
It is a little bit surprising so significantly leftagg2 is faster than the array_to_string(array_agg()) variant.
Regards
Pavel
this is why I fell in love with postgres 20 years ago, and never looked backmerlion
pgsql-general by date: