Problem with duplicate rows when FULL OUTER JOIN'ing 3 derived tables - Mailing list pgsql-sql
From | Andreas Joseph Krogh |
---|---|
Subject | Problem with duplicate rows when FULL OUTER JOIN'ing 3 derived tables |
Date | |
Msg-id | VisenaEmail.1a8.caad51b689a61ba0.1468f5ddf36@tc7-on Whole thread Raw |
List | pgsql-sql |
Hi all.
(complete schame with example-data as INSERT on bottom)
I have the need to show a report which is generated using 3 derived tables (sub-queries). For the sake of this example let's assume it a list off companies with projects and the quantity of fruit for each fruit in each company/project. A fruit can belong to a company and optionally a project, and a project can be used with any company.
I have this data.
apples on each project/company:
# | comp_name | proj_name | sum |
1 | C1 | NULL | 2 |
2 | C1 | P1 | 5 |
3 | C1 | P2 | 2 |
4 | C2 | P1 | 3 |
5 | C2 | P2 | 3 |
bananas on each project/company:
# | comp_name | proj_name | sum |
1 | C1 | NULL | 2 |
2 | C1 | P1 | 12 |
3 | C3 | NULL | 8 |
pineapples on each project/company:
# | comp_name | proj_name | sum |
1 | C1 | NULL | 10 |
2 | C2 | NULL | 10 |
I have this query but it produces lots of logically equivalent rows (same company with no project repeated times, one for each fruit):
-- Company1
insert into company(id, name) values(1, 'C1');
insert into project(id, name) values(1, 'P1');
insert into project(id, name) values(2, 'P2');
-- Company2
insert into company(id, name) values(2, 'C2');
insert into project(id, name) values(3, 'P3');
insert into project(id, name) values(4, 'P4');
-- Company3
insert into company(id, name) values(3, 'C3');
insert into project(id, name) values(5, 'P5');
insert into banana(qty, company_id) values(8, 3);
-- List all apples for projects and compaies
select c.name as comp_name, proj.name as proj_name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
order by comp_name ASC, proj_name ASC nulls first;
-- List all bananas for projects and compaies
select c.name as comp_name, proj.name as proj_name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
order by comp_name ASC, proj_name ASC nulls first;
-- List all pineapples for projects and compaies
select c.name as comp_name, proj.name as proj_name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
order by comp_name ASC, proj_name ASC nulls first;
-- Try to list all. Result is kind of correct but has many logically duplicate rows,
-- and on version of "company-name" and "project-name" for all 3 fruits
select apl.company_name as apl_company_name
, apl.project_name as apl_project_name
, apl.qty as num_apples
, bns.company_name as bns_company_name
, bns.project_name as bns_project_name
, bns.qty as num_bananas
, pns.company_name as pns_company_name
, pns.project_name as pns_project_name
, pns.qty as num_pineapples
select apl.company_name as apl_company_name
, apl.project_name as apl_project_name
, apl.qty as num_apples
, bns.company_name as bns_company_name
, bns.project_name as bns_project_name
, bns.qty as num_bananas
, pns.company_name as pns_company_name
, pns.project_name as pns_project_name
, pns.qty as num_pineapples
, apl.project_name as apl_project_name
, apl.qty as num_apples
, bns.company_name as bns_company_name
, bns.project_name as bns_project_name
, bns.qty as num_bananas
, pns.company_name as pns_company_name
, pns.project_name as pns_project_name
, pns.qty as num_pineapples
FROM
(
select c.id, c.name, proj.id, proj.name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
(
select c.id, c.name, proj.id, proj.name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
) as apl (company_id, company_name, project_id, project_name, qty)
FULL OUTER JOIN (
select c.id, c.name, proj.id, proj.name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
select c.id, c.name, proj.id, proj.name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
) as bns (company_id, company_name, project_id, project_name, qty)
ON apl.company_id = bns.company_id AND apl.project_id = bns.project_id
ON apl.company_id = bns.company_id AND apl.project_id = bns.project_id
FULL OUTER JOIN (
select c.id, c.name, proj.id, proj.name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
select c.id, c.name, proj.id, proj.name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
) as pns (company_id, company_name, project_id, project_name, qty)
ON apl.company_id = pns.company_id AND apl.project_id = pns.project_id
ON apl.company_id = pns.company_id AND apl.project_id = pns.project_id
order by coalesce(apl.company_name, bns.company_name, pns.company_name) ASC
, coalesce(apl.project_name, bns.project_name, pns.project_name) ASC nulls first
;
, coalesce(apl.project_name, bns.project_name, pns.project_name) ASC nulls first
;
# | apl_company_name | apl_project_name | num_apples | bns_company_name | bns_project_name | num_bananas | pns_company_name | pns_project_name | num_pineapples |
1 | NULL | NULL | NULL | C1 | NULL | 2 | NULL | NULL | NULL |
2 | C1 | NULL | 2 | NULL | NULL | NULL | NULL | NULL | NULL |
3 | NULL | NULL | NULL | NULL | NULL | NULL | C1 | NULL | 10 |
4 | C1 | P1 | 5 | C1 | P1 | 12 | NULL | NULL | NULL |
5 | C1 | P2 | 2 | NULL | NULL | NULL | NULL | NULL | NULL |
6 | NULL | NULL | NULL | NULL | NULL | NULL | C2 | NULL | 10 |
7 | C2 | P1 | 3 | NULL | NULL | NULL | NULL | NULL | NULL |
8 | C2 | P2 | 3 | NULL | NULL | NULL | NULL | NULL | NULL |
9 | NULL | NULL | NULL | C3 | NULL | 8 | NULL | NULL | NULL |
As you see in the above result row 1, 2 and 3 all represent company C1 without project and with apples=2, bananas=2, pineapples=10.
I'd like the output of a full report to look like:
# | company_name | project_name | num_apples | num_bananas | num_pineapples |
1 | C1 | NULL | 2 | 2 | 10 |
2 | C1 | P1 | 5 | 12 | NULL |
3 | C1 | P2 | 2 | NULL | NULL |
4 | C2 | NULL | NULL | NULL | 10 |
5 | C2 | P1 | 3 | NULL | NULL |
6 | C2 | P2 | 3 | NULL | NULL |
7 | C3 | NULL | NULL | 8 | NULL |
I get this with this query:
select coalesce(apl.company_name, bns.company_name, pns.company_name) as company_name
, coalesce(apl.project_name, bns.project_name, pns.project_name) as project_name
, apl.qty as num_apples
, bns.qty as num_bananas
, pns.qty as num_pineapples
, coalesce(apl.project_name, bns.project_name, pns.project_name) as project_name
, apl.qty as num_apples
, bns.qty as num_bananas
, pns.qty as num_pineapples
FROM
(
select c.id, c.name, proj.id, proj.name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
(
select c.id, c.name, proj.id, proj.name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
) as apl (company_id, company_name, project_id, project_name, qty)
FULL OUTER JOIN (
select c.id, c.name, proj.id, proj.name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
select c.id, c.name, proj.id, proj.name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
) as bns (company_id, company_name, project_id, project_name, qty)
ON apl.company_id = bns.company_id
AND (apl.project_id IS NULL AND bns.project_id IS NULL OR apl.project_id = bns.project_id )
ON apl.company_id = bns.company_id
AND (apl.project_id IS NULL AND bns.project_id IS NULL OR apl.project_id = bns.project_id )
FULL OUTER JOIN (
select c.id, c.name, proj.id, proj.name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
select c.id, c.name, proj.id, proj.name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
) as pns (company_id, company_name, project_id, project_name, qty)
ON apl.company_id = pns.company_id
AND (apl.project_id IS NULL AND pns.project_id IS NULL OR apl.project_id = pns.project_id)
ON apl.company_id = pns.company_id
AND (apl.project_id IS NULL AND pns.project_id IS NULL OR apl.project_id = pns.project_id)
order by company_name ASC, project_name ASC nulls first
;
;
I have some questions:
- Is FULL OUTER JOIN the correct way to handle this kind of report-query?
- Is the JOIN-clause correct?
- Is there a better way to avoid logically duplicate rows then the AND in the ON-clause for each FULL OUTER JOIN?
- Is there a way to avoid having to use coalesce to list the project and company-names?
PS: Are questions like these better suited for StackOverflow or should I post them here?
Thanks.
Here is the complete SQL for the example:
drop table if exists pineapple;
drop table if exists banana;
drop table if exists apple;
drop table if exists project;
drop table if exists company;
drop table if exists banana;
drop table if exists apple;
drop table if exists project;
drop table if exists company;
create table company(
id integer primary key,
name varchar not null unique
);
id integer primary key,
name varchar not null unique
);
create table project(
id integer primary key,
name varchar not null unique
);
id integer primary key,
name varchar not null unique
);
create table apple(
id serial primary key,
qty integer not null,
company_id integer NOT NULL references company(id),
project_id integer references project(id)
);
id serial primary key,
qty integer not null,
company_id integer NOT NULL references company(id),
project_id integer references project(id)
);
create table banana(
id serial primary key,
qty integer not null,
company_id integer NOT NULL references company(id),
project_id integer references project(id)
);
id serial primary key,
qty integer not null,
company_id integer NOT NULL references company(id),
project_id integer references project(id)
);
create table pineapple(
id serial primary key,
qty integer not null,
company_id integer NOT NULL references company(id),
project_id integer references project(id)
);
id serial primary key,
qty integer not null,
company_id integer NOT NULL references company(id),
project_id integer references project(id)
);
-- Company1
insert into company(id, name) values(1, 'C1');
insert into project(id, name) values(1, 'P1');
insert into project(id, name) values(2, 'P2');
insert into apple(qty, company_id) values(2, 1);
insert into apple(qty, company_id, project_id) values(3, 1, 1);
insert into apple(qty, company_id, project_id) values(2, 1, 1);
insert into apple(qty, company_id, project_id) values(2, 1, 2);
insert into apple(qty, company_id, project_id) values(3, 1, 1);
insert into apple(qty, company_id, project_id) values(2, 1, 1);
insert into apple(qty, company_id, project_id) values(2, 1, 2);
insert into banana(qty, company_id) values(2, 1);
insert into banana(qty, company_id, project_id) values(6, 1, 1);
insert into banana(qty, company_id, project_id) values(6, 1, 1);
insert into banana(qty, company_id, project_id) values(6, 1, 1);
insert into banana(qty, company_id, project_id) values(6, 1, 1);
insert into pineapple(qty, company_id) values(10, 1);
-- Company2
insert into company(id, name) values(2, 'C2');
insert into project(id, name) values(3, 'P3');
insert into project(id, name) values(4, 'P4');
insert into apple(qty, company_id, project_id) values(3, 2, 1);
insert into apple(qty, company_id, project_id) values(3, 2, 2);
insert into apple(qty, company_id, project_id) values(3, 2, 2);
insert into pineapple(qty, company_id) values(10, 2);
-- Company3
insert into company(id, name) values(3, 'C3');
insert into project(id, name) values(5, 'P5');
insert into banana(qty, company_id) values(8, 3);
-- List all apples for projects and compaies
select c.name as comp_name, proj.name as proj_name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
order by comp_name ASC, proj_name ASC nulls first;
-- List all bananas for projects and compaies
select c.name as comp_name, proj.name as proj_name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
order by comp_name ASC, proj_name ASC nulls first;
-- List all pineapples for projects and compaies
select c.name as comp_name, proj.name as proj_name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
order by comp_name ASC, proj_name ASC nulls first;
-- Try to list all. Result is kind of correct but has many logically duplicate rows,
-- and on version of "company-name" and "project-name" for all 3 fruits
select apl.company_name as apl_company_name
, apl.project_name as apl_project_name
, apl.qty as num_apples
, bns.company_name as bns_company_name
, bns.project_name as bns_project_name
, bns.qty as num_bananas
, pns.company_name as pns_company_name
, pns.project_name as pns_project_name
, pns.qty as num_pineapples
FROM
(
select c.id, c.name, proj.id, proj.name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
(
select c.id, c.name, proj.id, proj.name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
) as apl (company_id, company_name, project_id, project_name, qty)
FULL OUTER JOIN (
select c.id, c.name, proj.id, proj.name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
select c.id, c.name, proj.id, proj.name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
) as bns (company_id, company_name, project_id, project_name, qty)
ON apl.company_id = bns.company_id AND apl.project_id = bns.project_id
ON apl.company_id = bns.company_id AND apl.project_id = bns.project_id
FULL OUTER JOIN (
select c.id, c.name, proj.id, proj.name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
select c.id, c.name, proj.id, proj.name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
) as pns (company_id, company_name, project_id, project_name, qty)
ON apl.company_id = pns.company_id AND apl.project_id = pns.project_id
ON apl.company_id = pns.company_id AND apl.project_id = pns.project_id
order by coalesce(apl.company_name, bns.company_name, pns.company_name) ASC
, coalesce(apl.project_name, bns.project_name, pns.project_name) ASC nulls first
;
, coalesce(apl.project_name, bns.project_name, pns.project_name) ASC nulls first
;
-- Try filtering out NULLs, seems to work but unsure if it's safe
-- Is there a way to avoid coalescing the name-columns?
select coalesce(apl.company_name, bns.company_name, pns.company_name) as company_name
, coalesce(apl.project_name, bns.project_name, pns.project_name) as project_name
, apl.qty as num_apples
, bns.qty as num_bananas
, pns.qty as num_pineapples
-- Is there a way to avoid coalescing the name-columns?
select coalesce(apl.company_name, bns.company_name, pns.company_name) as company_name
, coalesce(apl.project_name, bns.project_name, pns.project_name) as project_name
, apl.qty as num_apples
, bns.qty as num_bananas
, pns.qty as num_pineapples
FROM
(
select c.id, c.name, proj.id, proj.name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
(
select c.id, c.name, proj.id, proj.name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
) as apl (company_id, company_name, project_id, project_name, qty)
FULL OUTER JOIN (
select c.id, c.name, proj.id, proj.name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
select c.id, c.name, proj.id, proj.name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
) as bns (company_id, company_name, project_id, project_name, qty)
ON apl.company_id = bns.company_id
AND (apl.project_id IS NULL AND bns.project_id IS NULL OR apl.project_id = bns.project_id )
ON apl.company_id = bns.company_id
AND (apl.project_id IS NULL AND bns.project_id IS NULL OR apl.project_id = bns.project_id )
FULL OUTER JOIN (
select c.id, c.name, proj.id, proj.name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
select c.id, c.name, proj.id, proj.name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
) as pns (company_id, company_name, project_id, project_name, qty)
ON apl.company_id = pns.company_id
AND (apl.project_id IS NULL AND pns.project_id IS NULL OR apl.project_id = pns.project_id)
ON apl.company_id = pns.company_id
AND (apl.project_id IS NULL AND pns.project_id IS NULL OR apl.project_id = pns.project_id)
order by company_name ASC, project_name ASC nulls first
;
;
--
Andreas Jospeh Krogh
CTO / Partner - Visena AS
Mobile: +47 909 56 963