Thread: Using aggregates to get sums and partial sums in one query
drop table if exists t_member_budget; drop table if exists t_member; drop table if exists t_project; create table t_project( id integer primary key, name varchar not null ); create table t_member( id integer primary key, person_id integer not null, project_id integer not null REFERENCES t_project(id), role varchar not null, years_experience integer not null ); create table t_member_budget( id integer PRIMARY KEY, member_id integer not null REFERENCES t_member(id), month date not null, amount integer not null ); insert into t_project(id, name) values(1, 'P1'); insert into t_project(id, name) values(2, 'P2'); -- Person 1,2,3 is member of project 1. Person 1 has 2 roles - so 2 entries insert into t_member(id, person_id, project_id, role, years_experience) values(1, 1, 1, 'LEADER', 3); -- person 1 member of project 1 as LEADER insert into t_member(id, person_id, project_id, role, years_experience) values(2, 1, 1, 'MEMBER', 7); -- person 1 member of project 1 as MEMBER insert into t_member(id, person_id, project_id, role, years_experience) values(3, 2, 1, 'LEADER', 1); -- person 2 member of project 1 insert into t_member(id, person_id, project_id, role, years_experience) values(4, 3, 1, 'LEADER', 6); -- person 3 member of project 1 -- Person 3 is member of project 2 insert into t_member(id, person_id, project_id, role, years_experience) values(5, 3, 2, 'LEADER', 5); -- person 3 member of project 2 insert into t_member_budget(id, member_id, month, amount) values(1, 1, '2014-01-01'::date, 2); -- person 1 in project 1 as LEADER insert into t_member_budget(id, member_id, month, amount) values(2, 1, '2014-02-01'::date, 3); -- person 1 in project 1 as LEADER insert into t_member_budget(id, member_id, month, amount) values(3, 1, '2014-03-01'::date, 2); -- person 1 in project 1 as LEADER insert into t_member_budget(id, member_id, month, amount) values(4, 2, '2014-01-01'::date, 2); -- person 1 in project 1 as MEMBER insert into t_member_budget(id, member_id, month, amount) values(5, 3, '2014-01-01'::date, 2); -- person 2 in project 1 insert into t_member_budget(id, member_id, month, amount) values(6, 4, '2014-01-01'::date, 2); -- person 3 in project 1 insert into t_member_budget(id, member_id, month, amount) values(7, 5, '2014-01-01'::date, 4); -- person 3 in project 2 insert into t_member_budget(id, member_id, month, amount) values(8, 5, '2014-01-01'::date, 2); -- person 3 in project 2
------+----------------------+-----------+----------------+-------------------
P1 | 17 | 4 | 3 | 13
P2 | 5 | 1 | 1 | 6
select p.name, sum(years_experience) as years_exp_in_project, count(role) as num_roles , count(distinct m.person_id) as unique_members, sum(g.amount) as sum_budget_amount from t_project p JOIN t_member m ON m.project_id = p.id join t_member_budget g ON g.member_id = m.id group by p.id order by p.name;
------+----------------------+-----------+----------------+-------------------
P1 | 23 | 6 | 3 | 13
P2 | 10 | 2 | 1 | 6
Attachment
From: pgsql-sql-owner@postgresql.org [mailto:pgsql-sql-owner@postgresql.org] On Behalf Of Andreas Joseph Krogh
Sent: Friday, November 07, 2014 8:19 PM
To: pgsql-sql@postgresql.org
Subject: [SQL] Using aggregates to get sums and partial sums in one query
Hi all.
I'm trying to retrieve some aggregated sums in one query.
This is a short example of what I'm trying to achieve.
The schema (not very good names, I know):
drop table if exists t_member_budget;
drop table if exists t_member;
drop table if exists t_project;
create table t_project(
id integer primary key,
name varchar not null
);
create table t_member(
id integer primary key,
person_id integer not null,
project_id integer not null REFERENCES t_project(id),
role varchar not null,
years_experience integer not null
);
create table t_member_budget(
id integer PRIMARY KEY,
member_id integer not null REFERENCES t_member(id),
month date not null,
amount integer not null
);
insert into t_project(id, name) values(1, 'P1');
insert into t_project(id, name) values(2, 'P2');
-- Person 1,2,3 is member of project 1. Person 1 has 2 roles - so 2 entries
insert into t_member(id, person_id, project_id, role, years_experience)
values(1, 1, 1, 'LEADER', 3); -- person 1 member of project 1 as LEADER
insert into t_member(id, person_id, project_id, role, years_experience)
values(2, 1, 1, 'MEMBER', 7); -- person 1 member of project 1 as MEMBER
insert into t_member(id, person_id, project_id, role, years_experience)
values(3, 2, 1, 'LEADER', 1); -- person 2 member of project 1
insert into t_member(id, person_id, project_id, role, years_experience)
values(4, 3, 1, 'LEADER', 6); -- person 3 member of project 1
-- Person 3 is member of project 2
insert into t_member(id, person_id, project_id, role, years_experience)
values(5, 3, 2, 'LEADER', 5); -- person 3 member of project 2
insert into t_member_budget(id, member_id, month, amount) values(1, 1, '2014-01-01'::date, 2); -- person 1 in project 1 as LEADER
insert into t_member_budget(id, member_id, month, amount) values(2, 1, '2014-02-01'::date, 3); -- person 1 in project 1 as LEADER
insert into t_member_budget(id, member_id, month, amount) values(3, 1, '2014-03-01'::date, 2); -- person 1 in project 1 as LEADER
insert into t_member_budget(id, member_id, month, amount) values(4, 2, '2014-01-01'::date, 2); -- person 1 in project 1 as MEMBER
insert into t_member_budget(id, member_id, month, amount) values(5, 3, '2014-01-01'::date, 2); -- person 2 in project 1
insert into t_member_budget(id, member_id, month, amount) values(6, 4, '2014-01-01'::date, 2); -- person 3 in project 1
insert into t_member_budget(id, member_id, month, amount) values(7, 5, '2014-01-01'::date, 4); -- person 3 in project 2
insert into t_member_budget(id, member_id, month, amount) values(8, 5, '2014-01-01'::date, 2); -- person 3 in project 2
Then what I'm trying to get out is this dataset:
name | years_exp_in_project | num_roles | unique_members | sum_budget_amount
------+----------------------+-----------+----------------+-------------------
P1 | 17 | 4 | 3 | 13
P2 | 5 | 1 | 1 | 6
This query kind of illustrates what I'm after, but gives the wrong results, of course:
select p.name, sum(years_experience) as years_exp_in_project, count(role) as num_roles
, count(distinct m.person_id) as unique_members, sum(g.amount) as sum_budget_amount
from t_project p JOIN t_member m ON m.project_id = p.id
join t_member_budget g ON g.member_id = m.id
group by p.id
order by p.name;
This gives me:
name | years_exp_in_project | num_roles | unique_members | sum_budget_amount
------+----------------------+-----------+----------------+-------------------
P1 | 23 | 6 | 3 | 13
P2 | 10 | 2 | 1 | 6
Which obviously is wrong.
I know I can craft a query which uses sub-queries and CTE to get the desired results, but I hope there is cleaner solution.
Any idea how to craft a neat query for this without sub-queries or CTE? Will Windowing-functions help out here?
Thanks.
--
I’m a bit late to this discussion.
But, if you still don’t have an answer, try this:
select p.name, sum(distinct m.years_experience) as years_exp_in_project, count(distinct (m.id, m.role))
, count(distinct m.person_id) as unique_members, sum(g.amount) as sum_budget_amount
from t_project p JOIN t_member m ON m.project_id = p.id
join t_member_budget g ON g.member_id = m.id
group by p.id
order by p.name;
Regards,
Igor Neyman
Igor Neyman wrote > select p.name, sum(distinct m.years_experience) as years_exp_in_project, This is wrong on its face; two different people with the same experience will result in under-counting You need to figure out some way for the DISTINCT to include a personID and then just sum up the year_experience for each individual. My initial thought was to use "ARRAY_AGG(DISTINCT composite_type)" to construct the unique dataset then pass the result through a custom function that would unnest(...) that array, pull out the years, sum them, and return the sum. > count(distinct (m.id, m.role)) Not technically a true role count if two people share the same role - it is unclear from the query what constraints the problem domain imposes. This can be a solution for this column. I don't really see how using a couple of CTEs to build up summaries for each table and then joining them together in an outer query is problematic. TBH it would probably be easier to maintain than one single super query with a bunch of distinct aggregates; and I doubt there would be much if any performance hit. David J. -- View this message in context: http://postgresql.1045698.n5.nabble.com/Using-aggregates-to-get-sums-and-partial-sums-in-one-query-tp5826157p5826346.html Sent from the PostgreSQL - sql mailing list archive at Nabble.com.
Igor Neyman wrote
> select p.name, sum(distinct m.years_experience) as years_exp_in_project,
This is wrong on its face; two different people with the same experience
will result in under-counting
You need to figure out some way for the DISTINCT to include a personID and
then just sum up the year_experience for each individual. My initial
thought was to use "ARRAY_AGG(DISTINCT composite_type)" to construct the
unique dataset then pass the result through a custom function that would
unnest(...) that array, pull out the years, sum them, and return the sum.
> count(distinct (m.id, m.role))
Not technically a true role count if two people share the same role - it is
unclear from the query what constraints the problem domain imposes. This
can be a solution for this column.
I don't really see how using a couple of CTEs to build up summaries for each
table and then joining them together in an outer query is problematic. TBH
it would probably be easier to maintain than one single super query with a
bunch of distinct aggregates; and I doubt there would be much if any
performance hit.