Thread: Problem with duplicate rows when FULL OUTER JOIN'ing 3 derived tables
Problem with duplicate rows when FULL OUTER JOIN'ing 3 derived tables
From
Andreas Joseph Krogh
Date:
(Sorry for posting again, but this time with more readable tables)
Hi all.
(complete schame with example-data as INSERT on bottom)
I have the need to show a report which is generated using 3 derived tables (sub-queries). For the sake of this example let's assume it a list off companies with projects and the quantity of fruit for each fruit in each company/project. A fruit can belong to a company and optionally a project, and a project can be used with any company.
I have this data.
apples on each project/company:
# | comp_name | proj_name | sum |
1 | C1 | NULL | 2 |
2 | C1 | P1 | 5 |
3 | C1 | P2 | 2 |
4 | C2 | P1 | 3 |
5 | C2 | P2 | 3 |
bananas on each project/company:
# | comp_name | proj_name | sum |
1 | C1 | NULL | 2 |
2 | C1 | P1 | 12 |
3 | C3 | NULL | 8 |
pineapples on each project/company:
# | comp_name | proj_name | sum |
1 | C1 | NULL | 10 |
2 | C2 | NULL | 10 |
I have this query but it produces lots of logically equivalent rows (same company with no project repeated times, one for each fruit):
I get this with this query:
-- Company1
insert into company(id, name) values(1, 'C1');
insert into project(id, name) values(1, 'P1');
insert into project(id, name) values(2, 'P2');
-- Company2
insert into company(id, name) values(2, 'C2');
insert into project(id, name) values(3, 'P3');
insert into project(id, name) values(4, 'P4');
-- Company3
insert into company(id, name) values(3, 'C3');
insert into project(id, name) values(5, 'P5');
insert into banana(qty, company_id) values(8, 3);
-- List all apples for projects and compaies
select c.name as comp_name, proj.name as proj_name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
order by comp_name ASC, proj_name ASC nulls first;
-- List all bananas for projects and compaies
select c.name as comp_name, proj.name as proj_name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
order by comp_name ASC, proj_name ASC nulls first;
-- List all pineapples for projects and compaies
select c.name as comp_name, proj.name as proj_name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
order by comp_name ASC, proj_name ASC nulls first;
-- Try to list all. Result is kind of correct but has many logically duplicate rows,
-- and on version of "company-name" and "project-name" for all 3 fruits
select apl.company_name as apl_company_name
, apl.project_name as apl_project_name
, apl.qty as num_apples
, bns.company_name as bns_company_name
, bns.project_name as bns_project_name
, bns.qty as num_bananas
, pns.company_name as pns_company_name
, pns.project_name as pns_project_name
, pns.qty as num_pineapples
select apl.company_name as apl_company_name
, apl.project_name as apl_project_name
, apl.qty as num_apples
, bns.company_name as bns_company_name
, bns.project_name as bns_project_name
, bns.qty as num_bananas
, pns.company_name as pns_company_name
, pns.project_name as pns_project_name
, pns.qty as num_pineapples
, apl.project_name as apl_project_name
, apl.qty as num_apples
, bns.company_name as bns_company_name
, bns.project_name as bns_project_name
, bns.qty as num_bananas
, pns.company_name as pns_company_name
, pns.project_name as pns_project_name
, pns.qty as num_pineapples
FROM
(
select c.id, c.name, proj.id, proj.name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
(
select c.id, c.name, proj.id, proj.name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
) as apl (company_id, company_name, project_id, project_name, qty)
FULL OUTER JOIN (
select c.id, c.name, proj.id, proj.name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
select c.id, c.name, proj.id, proj.name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
) as bns (company_id, company_name, project_id, project_name, qty)
ON apl.company_id = bns.company_id AND apl.project_id = bns.project_id
ON apl.company_id = bns.company_id AND apl.project_id = bns.project_id
FULL OUTER JOIN (
select c.id, c.name, proj.id, proj.name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
select c.id, c.name, proj.id, proj.name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
) as pns (company_id, company_name, project_id, project_name, qty)
ON apl.company_id = pns.company_id AND apl.project_id = pns.project_id
ON apl.company_id = pns.company_id AND apl.project_id = pns.project_id
order by coalesce(apl.company_name, bns.company_name, pns.company_name) ASC
, coalesce(apl.project_name, bns.project_name, pns.project_name) ASC nulls first
;
, coalesce(apl.project_name, bns.project_name, pns.project_name) ASC nulls first
;
# | apl_company_name | apl_project_name | num_apples | bns_company_name | bns_project_name | num_bananas | pns_company_name | pns_project_name | num_pineapples |
1 | NULL | NULL | NULL | C1 | NULL | 2 | NULL | NULL | NULL |
2 | C1 | NULL | 2 | NULL | NULL | NULL | NULL | NULL | NULL |
3 | NULL | NULL | NULL | NULL | NULL | NULL | C1 | NULL | 10 |
4 | C1 | P1 | 5 | C1 | P1 | 12 | NULL | NULL | NULL |
5 | C1 | P2 | 2 | NULL | NULL | NULL | NULL | NULL | NULL |
6 | NULL | NULL | NULL | NULL | NULL | NULL | C2 | NULL | 10 |
7 | C2 | P1 | 3 | NULL | NULL | NULL | NULL | NULL | NULL |
8 | C2 | P2 | 3 | NULL | NULL | NULL | NULL | NULL | NULL |
9 | NULL | NULL | NULL | C3 | NULL | 8 | NULL | NULL | NULL |
As you see in the above result row 1, 2 and 3 all represent company C1 without project and with apples=2, bananas=2, pineapples=10.
I'd like the output of a full report to look like:
# | company_name | project_name | num_apples | num_bananas | num_pineapples |
1 | C1 | NULL | 2 | 2 | 10 |
2 | C1 | P1 | 5 | 12 | NULL |
3 | C1 | P2 | 2 | NULL | NULL |
4 | C2 | NULL | NULL | NULL | 10 |
5 | C2 | P1 | 3 | NULL | NULL |
6 | C2 | P2 | 3 | NULL | NULL |
7 | C3 | NULL | NULL | 8 | NULL |
select coalesce(apl.company_name, bns.company_name, pns.company_name) as company_name
, coalesce(apl.project_name, bns.project_name, pns.project_name) as project_name
, apl.qty as num_apples
, bns.qty as num_bananas
, pns.qty as num_pineapples
, coalesce(apl.project_name, bns.project_name, pns.project_name) as project_name
, apl.qty as num_apples
, bns.qty as num_bananas
, pns.qty as num_pineapples
FROM
(
select c.id, c.name, proj.id, proj.name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
(
select c.id, c.name, proj.id, proj.name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
) as apl (company_id, company_name, project_id, project_name, qty)
FULL OUTER JOIN (
select c.id, c.name, proj.id, proj.name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
select c.id, c.name, proj.id, proj.name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
) as bns (company_id, company_name, project_id, project_name, qty)
ON apl.company_id = bns.company_id
AND (apl.project_id IS NULL AND bns.project_id IS NULL OR apl.project_id = bns.project_id )
ON apl.company_id = bns.company_id
AND (apl.project_id IS NULL AND bns.project_id IS NULL OR apl.project_id = bns.project_id )
FULL OUTER JOIN (
select c.id, c.name, proj.id, proj.name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
select c.id, c.name, proj.id, proj.name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
) as pns (company_id, company_name, project_id, project_name, qty)
ON apl.company_id = pns.company_id
AND (apl.project_id IS NULL AND pns.project_id IS NULL OR apl.project_id = pns.project_id)
ON apl.company_id = pns.company_id
AND (apl.project_id IS NULL AND pns.project_id IS NULL OR apl.project_id = pns.project_id)
order by company_name ASC, project_name ASC nulls first
;
;
I have some questions:
- Is FULL OUTER JOIN the correct way to handle this kind of report-query?
- Is the JOIN-clause correct?
- Is there a better way to avoid logically duplicate rows then the AND in the ON-clause for each FULL OUTER JOIN?
- Is there a way to avoid having to use coalesce to list the project and company-names?
PS: Are questions like these better suited for StackOverflow or should I post them here?
Thanks.
Here is the complete SQL for the example:
drop table if exists pineapple;
drop table if exists banana;
drop table if exists apple;
drop table if exists project;
drop table if exists company;
drop table if exists banana;
drop table if exists apple;
drop table if exists project;
drop table if exists company;
create table company(
id integer primary key,
name varchar not null unique
);
id integer primary key,
name varchar not null unique
);
create table project(
id integer primary key,
name varchar not null unique
);
id integer primary key,
name varchar not null unique
);
create table apple(
id serial primary key,
qty integer not null,
company_id integer NOT NULL references company(id),
project_id integer references project(id)
);
id serial primary key,
qty integer not null,
company_id integer NOT NULL references company(id),
project_id integer references project(id)
);
create table banana(
id serial primary key,
qty integer not null,
company_id integer NOT NULL references company(id),
project_id integer references project(id)
);
id serial primary key,
qty integer not null,
company_id integer NOT NULL references company(id),
project_id integer references project(id)
);
create table pineapple(
id serial primary key,
qty integer not null,
company_id integer NOT NULL references company(id),
project_id integer references project(id)
);
id serial primary key,
qty integer not null,
company_id integer NOT NULL references company(id),
project_id integer references project(id)
);
-- Company1
insert into company(id, name) values(1, 'C1');
insert into project(id, name) values(1, 'P1');
insert into project(id, name) values(2, 'P2');
insert into apple(qty, company_id) values(2, 1);
insert into apple(qty, company_id, project_id) values(3, 1, 1);
insert into apple(qty, company_id, project_id) values(2, 1, 1);
insert into apple(qty, company_id, project_id) values(2, 1, 2);
insert into apple(qty, company_id, project_id) values(3, 1, 1);
insert into apple(qty, company_id, project_id) values(2, 1, 1);
insert into apple(qty, company_id, project_id) values(2, 1, 2);
insert into banana(qty, company_id) values(2, 1);
insert into banana(qty, company_id, project_id) values(6, 1, 1);
insert into banana(qty, company_id, project_id) values(6, 1, 1);
insert into banana(qty, company_id, project_id) values(6, 1, 1);
insert into banana(qty, company_id, project_id) values(6, 1, 1);
insert into pineapple(qty, company_id) values(10, 1);
-- Company2
insert into company(id, name) values(2, 'C2');
insert into project(id, name) values(3, 'P3');
insert into project(id, name) values(4, 'P4');
insert into apple(qty, company_id, project_id) values(3, 2, 1);
insert into apple(qty, company_id, project_id) values(3, 2, 2);
insert into apple(qty, company_id, project_id) values(3, 2, 2);
insert into pineapple(qty, company_id) values(10, 2);
-- Company3
insert into company(id, name) values(3, 'C3');
insert into project(id, name) values(5, 'P5');
insert into banana(qty, company_id) values(8, 3);
-- List all apples for projects and compaies
select c.name as comp_name, proj.name as proj_name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
order by comp_name ASC, proj_name ASC nulls first;
-- List all bananas for projects and compaies
select c.name as comp_name, proj.name as proj_name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
order by comp_name ASC, proj_name ASC nulls first;
-- List all pineapples for projects and compaies
select c.name as comp_name, proj.name as proj_name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
order by comp_name ASC, proj_name ASC nulls first;
-- Try to list all. Result is kind of correct but has many logically duplicate rows,
-- and on version of "company-name" and "project-name" for all 3 fruits
select apl.company_name as apl_company_name
, apl.project_name as apl_project_name
, apl.qty as num_apples
, bns.company_name as bns_company_name
, bns.project_name as bns_project_name
, bns.qty as num_bananas
, pns.company_name as pns_company_name
, pns.project_name as pns_project_name
, pns.qty as num_pineapples
FROM
(
select c.id, c.name, proj.id, proj.name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
(
select c.id, c.name, proj.id, proj.name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
) as apl (company_id, company_name, project_id, project_name, qty)
FULL OUTER JOIN (
select c.id, c.name, proj.id, proj.name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
select c.id, c.name, proj.id, proj.name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
) as bns (company_id, company_name, project_id, project_name, qty)
ON apl.company_id = bns.company_id AND apl.project_id = bns.project_id
ON apl.company_id = bns.company_id AND apl.project_id = bns.project_id
FULL OUTER JOIN (
select c.id, c.name, proj.id, proj.name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
select c.id, c.name, proj.id, proj.name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
) as pns (company_id, company_name, project_id, project_name, qty)
ON apl.company_id = pns.company_id AND apl.project_id = pns.project_id
ON apl.company_id = pns.company_id AND apl.project_id = pns.project_id
order by coalesce(apl.company_name, bns.company_name, pns.company_name) ASC
, coalesce(apl.project_name, bns.project_name, pns.project_name) ASC nulls first
;
, coalesce(apl.project_name, bns.project_name, pns.project_name) ASC nulls first
;
-- Try filtering out NULLs, seems to work but unsure if it's safe
-- Is there a way to avoid coalescing the name-columns?
select coalesce(apl.company_name, bns.company_name, pns.company_name) as company_name
, coalesce(apl.project_name, bns.project_name, pns.project_name) as project_name
, apl.qty as num_apples
, bns.qty as num_bananas
, pns.qty as num_pineapples
-- Is there a way to avoid coalescing the name-columns?
select coalesce(apl.company_name, bns.company_name, pns.company_name) as company_name
, coalesce(apl.project_name, bns.project_name, pns.project_name) as project_name
, apl.qty as num_apples
, bns.qty as num_bananas
, pns.qty as num_pineapples
FROM
(
select c.id, c.name, proj.id, proj.name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
(
select c.id, c.name, proj.id, proj.name, sum(ap.qty)
from apple ap
JOIN company c ON ap.company_id = c.id
left outer join project proj ON ap.project_id = proj.id
group by c.id, proj.id
) as apl (company_id, company_name, project_id, project_name, qty)
FULL OUTER JOIN (
select c.id, c.name, proj.id, proj.name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
select c.id, c.name, proj.id, proj.name, sum(b.qty)
from banana b
JOIN company c ON b.company_id = c.id
left outer join project proj ON b.project_id = proj.id
group by c.id, proj.id
) as bns (company_id, company_name, project_id, project_name, qty)
ON apl.company_id = bns.company_id
AND (apl.project_id IS NULL AND bns.project_id IS NULL OR apl.project_id = bns.project_id )
ON apl.company_id = bns.company_id
AND (apl.project_id IS NULL AND bns.project_id IS NULL OR apl.project_id = bns.project_id )
FULL OUTER JOIN (
select c.id, c.name, proj.id, proj.name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
select c.id, c.name, proj.id, proj.name, sum(pn.qty)
from pineapple pn
JOIN company c ON pn.company_id = c.id
left outer join project proj ON pn.project_id = proj.id
group by c.id, proj.id
) as pns (company_id, company_name, project_id, project_name, qty)
ON apl.company_id = pns.company_id
AND (apl.project_id IS NULL AND pns.project_id IS NULL OR apl.project_id = pns.project_id)
ON apl.company_id = pns.company_id
AND (apl.project_id IS NULL AND pns.project_id IS NULL OR apl.project_id = pns.project_id)
order by company_name ASC, project_name ASC nulls first
;
;
--
Andreas Jospeh Krogh
CTO / Partner - Visena AS
Mobile: +47 909 56 963
Attachment
Re: Problem with duplicate rows when FULL OUTER JOIN'ing 3 derived tables
From
David G Johnston
Date:
Note - numbers do not relate to your questions. This pretty much answers 1 and 3. 1. I'd ensure that project name/id on a table can never be null by creating a dummy project that means "none assigned". 2. I would avoid full join. To do so I'd cross join a distinct list of companies with a distinct list of projects. 3. Against the join in 2 you then left join three times, once for each product table. 4. In the final result any product not having a correspond company/project would have its value coalesced to zero. The end result is a table without any NULL and, in the case of your explanatory data, 9 rows - two of which would be all zeros: (c3,p1) and (c3,p,2) If you cannot change the raw data I'd use CTE/WITH to normalize the data according to 1 and the use these CTEs in the rest of the query. I would also do 2 is a CTE then 3 would be normal subqueries or relation references as necessary. David J. -- View this message in context: http://postgresql.1045698.n5.nabble.com/Problem-with-duplicate-rows-when-FULL-OUTER-JOIN-ing-3-derived-tables-tp5806970p5807003.html Sent from the PostgreSQL - sql mailing list archive at Nabble.com.
Re: Re: Problem with duplicate rows when FULL OUTER JOIN'ing 3 derived tables
From
Andreas Joseph Krogh
Date:
På torsdag 12. juni 2014 kl. 16:02:39, skrev David G Johnston <david.g.johnston@gmail.com>:
Note - numbers do not relate to your questions. This pretty much answers 1
and 3.
1. I'd ensure that project name/id on a table can never be null by creating
a dummy project that means "none assigned".
2. I would avoid full join. To do so I'd cross join a distinct list of
companies with a distinct list of projects.
3. Against the join in 2 you then left join three times, once for each
product table.
4. In the final result any product not having a correspond company/project
would have its value coalesced to zero.
The end result is a table without any NULL and, in the case of your
explanatory data, 9 rows - two of which would be all zeros: (c3,p1) and
(c3,p,2)
If you cannot change the raw data I'd use CTE/WITH to normalize the data
according to 1 and the use these CTEs in the rest of the query. I would
also do 2 is a CTE then 3 would be normal subqueries or relation references
as necessary.
David J.
Hi and thanks for input.
I cannot change the data so I have to deal with NULLs.
Not all companies or projects are in the result, only the ones involved having fruits, by combining the derived tables. I therefore don't see how I effectively can build up a list of distinct companies and projects to join with.
I have a simplified version (full schema below) here which shows that there's an error in the query (when one extra derived table is added to the query):
SELECT
coalesce(apl.company_name, bns.company_name, pns.company_name, prs.company_name) AS company_name,
coalesce(apl.project_name, bns.project_name, pns.project_name, prs.project_name) AS project_name,
apl.qty AS num_apples,
bns.qty AS num_bananas,
pns.qty AS num_pineapples,
prs.qty AS num_pears
coalesce(apl.company_name, bns.company_name, pns.company_name, prs.company_name) AS company_name,
coalesce(apl.project_name, bns.project_name, pns.project_name, prs.project_name) AS project_name,
apl.qty AS num_apples,
bns.qty AS num_bananas,
pns.qty AS num_pineapples,
prs.qty AS num_pears
FROM
company_apples AS apl
FULL OUTER JOIN company_bananas AS bns
ON apl.company_id = bns.company_id
AND (apl.project_id IS NULL AND bns.project_id IS NULL OR apl.project_id = bns.project_id)
ON apl.company_id = bns.company_id
AND (apl.project_id IS NULL AND bns.project_id IS NULL OR apl.project_id = bns.project_id)
FULL OUTER JOIN company_pineapples AS pns
ON apl.company_id = pns.company_id
AND (apl.project_id IS NULL AND pns.project_id IS NULL OR apl.project_id = pns.project_id)
ON apl.company_id = pns.company_id
AND (apl.project_id IS NULL AND pns.project_id IS NULL OR apl.project_id = pns.project_id)
FULL OUTER JOIN company_pears AS prs
ON apl.company_id = prs.company_id
AND (apl.project_id IS NULL AND prs.project_id IS NULL OR apl.project_id = prs.project_id)
ON apl.company_id = prs.company_id
AND (apl.project_id IS NULL AND prs.project_id IS NULL OR apl.project_id = prs.project_id)
ORDER BY company_name ASC, project_name ASC NULLS FIRST;
This gives the result:
# | company_name | project_name | num_apples | num_bananas | num_pineapples | num_pears |
1 | C1 | NULL | 2 | 2 | 10 | NULL |
2 | C1 | P1 | 5 | 12 | NULL | NULL |
3 | C1 | P2 | 2 | NULL | NULL | NULL |
4 | C2 | NULL | 3 | NULL | 10 | NULL |
5 | C2 | P1 | 3 | NULL | NULL | NULL |
6 | C2 | P2 | 3 | NULL | NULL | NULL |
7 | C3 | NULL | NULL | 8 | NULL | NULL |
8 | C3 | NULL | NULL | NULL | NULL | 7 |
As you see, there are two rows for C3, which should have been 1, with num_bananas=8 and num_pears=7. There has to be something wrong with my FULL OUTER JOINs but I don't know what....
Anyone knows how to write the correct query for this?
Thanks.
Full schame and example-data:
drop table if exists company_apples;
drop table if exists company_bananas;
drop table if exists company_pineapples;
drop table if exists company_bananas;
drop table if exists company_pineapples;
CREATE TABLE company_apples
(
company_id INT,
company_name VARCHAR,
project_id INT,
project_name VARCHAR,
qty BIGINT
);
(
company_id INT,
company_name VARCHAR,
project_id INT,
project_name VARCHAR,
qty BIGINT
);
CREATE TABLE company_bananas
(
company_id INT,
company_name VARCHAR,
project_id INT,
project_name VARCHAR,
qty BIGINT
);
(
company_id INT,
company_name VARCHAR,
project_id INT,
project_name VARCHAR,
qty BIGINT
);
CREATE TABLE company_pineapples
(
company_id INT,
company_name VARCHAR,
project_id INT,
project_name VARCHAR,
qty BIGINT
);
(
company_id INT,
company_name VARCHAR,
project_id INT,
project_name VARCHAR,
qty BIGINT
);
CREATE TABLE company_pears
(
company_id INT,
company_name VARCHAR,
project_id INT,
project_name VARCHAR,
qty BIGINT
);
(
company_id INT,
company_name VARCHAR,
project_id INT,
project_name VARCHAR,
qty BIGINT
);
INSERT INTO company_apples (company_id, company_name, project_id, project_name, qty) VALUES (1, 'C1', null, null, 2);
INSERT INTO company_apples (company_id, company_name, project_id, project_name, qty) VALUES (1, 'C1', 1, 'P1', 5);
INSERT INTO company_apples (company_id, company_name, project_id, project_name, qty) VALUES (1, 'C1', 2, 'P2', 2);
INSERT INTO company_apples (company_id, company_name, project_id, project_name, qty) VALUES (2, 'C2', null, null, 3);
INSERT INTO company_apples (company_id, company_name, project_id, project_name, qty) VALUES (2, 'C2', 1, 'P1', 3);
INSERT INTO company_apples (company_id, company_name, project_id, project_name, qty) VALUES (2, 'C2', 2, 'P2', 3);
INSERT INTO company_bananas (company_id, company_name, project_id, project_name, qty) VALUES (1, 'C1', null, null, 2);
INSERT INTO company_bananas (company_id, company_name, project_id, project_name, qty) VALUES (1, 'C1', 1, 'P1', 12);
INSERT INTO company_bananas (company_id, company_name, project_id, project_name, qty) VALUES (3, 'C3', null, null, 8);
INSERT INTO company_bananas (company_id, company_name, project_id, project_name, qty) VALUES (1, 'C1', 1, 'P1', 12);
INSERT INTO company_bananas (company_id, company_name, project_id, project_name, qty) VALUES (3, 'C3', null, null, 8);
INSERT INTO company_pineapples (company_id, company_name, project_id, project_name, qty) VALUES (1, 'C1', null, null, 10);
INSERT INTO company_pineapples (company_id, company_name, project_id, project_name, qty) VALUES (2, 'C2', null, null, 10);
INSERT INTO company_pineapples (company_id, company_name, project_id, project_name, qty) VALUES (2, 'C2', null, null, 10);
INSERT INTO company_pears (company_id, company_name, project_id, project_name, qty) VALUES (3, 'C3', null, null, 7);
select company_name, project_name, qty from company_apples order by company_name ASC, project_name ASC nulls first;
select company_name, project_name, qty from company_bananas order by company_name ASC, project_name ASC nulls first;
select company_name, project_name, qty from company_pineapples order by company_name ASC, project_name ASC nulls first;
select company_name, project_name, qty from company_pears order by company_name ASC, project_name ASC nulls first;
select company_name, project_name, qty from company_bananas order by company_name ASC, project_name ASC nulls first;
select company_name, project_name, qty from company_pineapples order by company_name ASC, project_name ASC nulls first;
select company_name, project_name, qty from company_pears order by company_name ASC, project_name ASC nulls first;
--
Andreas Jospeh Krogh
CTO / Partner - Visena AS
Mobile: +47 909 56 963
Attachment
Re: Problem with duplicate rows when FULL OUTER JOIN'ing 3 derived tables
From
David G Johnston
Date:
SELECT
coalesce(apl.company_name, bns.company_name, pns.company_name, prs.company_name) AS company_name,
coalesce(apl.project_name, bns.project_name, pns.project_name, prs.project_name) AS project_name,
apl.qty AS num_apples,
bns.qty AS num_bananas,
pns.qty AS num_pineapples,
prs.qty AS num_pearsFROMcompany_apples AS aplFULL OUTER JOIN company_bananas AS bns
ON apl.company_id = bns.company_id
AND (apl.project_id IS NULL AND bns.project_id IS NULL OR apl.project_id = bns.project_id)FULL OUTER JOIN company_pineapples AS pns
ON apl.company_id = pns.company_id
AND (apl.project_id IS NULL AND pns.project_id IS NULL OR apl.project_id = pns.project_id)FULL OUTER JOIN company_pears AS prs
ON apl.company_id = prs.company_id
AND (apl.project_id IS NULL AND prs.project_id IS NULL OR apl.project_id = prs.project_id)ORDER BY company_name ASC, project_name ASC NULLS FIRST;This gives the result:
# company_name project_name num_apples num_bananas num_pineapples num_pears 1 C1 NULL 2 2 10 NULL 2 C1 P1 5 12 NULL NULL 3 C1 P2 2 NULL NULL NULL 4 C2 NULL 3 NULL 10 NULL 5 C2 P1 3 NULL NULL NULL 6 C2 P2 3 NULL NULL NULL 7 C3 NULL NULL 8 NULL NULL 8 C3 NULL NULL NULL NULL 7 As you see, there are two rows for C3, which should have been 1, with num_bananas=8 and num_pears=7. There has to be something wrong with my FULL OUTER JOINs but I don't know what....
Row #7 is from the right side of the outer join between apples and bananas
Row #8 is from the right side of the outer join between apples and pears
Since you are only using apple as the source of valid company-project pairs only those records will be "correct" per your definition.
Even if you do not use a CROSS JOIN between company/product you have to obtain a master list of valid company-project pairs from ALL of the target tables. You can then LEFT JOIN that master against each of the target tables and be sure that you have a valid master record to attach to.
The direct way to do this is:
SELECT DISTINCT company_id, project_id FROM apples
UNION DISTINCT
SELECT DISTINCT company_id, project_id FROM pears
[and so forth]
David J.
View this message in context: Re: Problem with duplicate rows when FULL OUTER JOIN'ing 3 derived tables
Sent from the PostgreSQL - sql mailing list archive at Nabble.com.
Re: Problem with duplicate rows when FULL OUTER JOIN'ing 3 derived tables
From
David G Johnston
Date:
SELECT
coalesce(apl.company_name, bns.company_name, pns.company_name, prs.company_name) AS company_name,
coalesce(apl.project_name, bns.project_name, pns.project_name, prs.project_name) AS project_name,
apl.qty AS num_apples,
bns.qty AS num_bananas,
pns.qty AS num_pineapples,
prs.qty AS num_pearsFROMcompany_apples AS aplFULL OUTER JOIN company_bananas AS bns
ON apl.company_id = bns.company_id
AND (apl.project_id IS NULL AND bns.project_id IS NULL OR apl.project_id = bns.project_id)FULL OUTER JOIN company_pineapples AS pns
ON apl.company_id = pns.company_id
AND (apl.project_id IS NULL AND pns.project_id IS NULL OR apl.project_id = pns.project_id)FULL OUTER JOIN company_pears AS prs
ON apl.company_id = prs.company_id
AND (apl.project_id IS NULL AND prs.project_id IS NULL OR apl.project_id = prs.project_id)ORDER BY company_name ASC, project_name ASC NULLS FIRST;This gives the result:
# company_name project_name num_apples num_bananas num_pineapples num_pears 1 C1 NULL 2 2 10 NULL 2 C1 P1 5 12 NULL NULL 3 C1 P2 2 NULL NULL NULL 4 C2 NULL 3 NULL 10 NULL 5 C2 P1 3 NULL NULL NULL 6 C2 P2 3 NULL NULL NULL 7 C3 NULL NULL 8 NULL NULL 8 C3 NULL NULL NULL NULL 7 As you see, there are two rows for C3, which should have been 1, with num_bananas=8 and num_pears=7. There has to be something wrong with my FULL OUTER JOINs but I don't know what....Row #7 is from the right side of the outer join between apples and bananasRow #8 is from the right side of the outer join between apples and pearsSince you are only using apple as the source of valid company-project pairs only those records will be "correct" per your definition.Even if you do not use a CROSS JOIN between company/product you have to obtain a master list of valid company-project pairs from ALL of the target tables. You can then LEFT JOIN that master against each of the target tables and be sure that you have a valid master record to attach to.The direct way to do this is:SELECT DISTINCT company_id, project_id FROM applesUNION DISTINCTSELECT DISTINCT company_id, project_id FROM pears[and so forth]David J.
Though I guess you could also simply chain together the FULL OUTER join:
FROM ( ( (apple OUTER bananas) AS ab OUTER pears ) AS abp OUTER pineapples ) AS abpp
David J.
View this message in context: Re: Problem with duplicate rows when FULL OUTER JOIN'ing 3 derived tables
Sent from the PostgreSQL - sql mailing list archive at Nabble.com.
Re: Re: Problem with duplicate rows when FULL OUTER JOIN'ing 3 derived tables
From
Andreas Joseph Krogh
Date:
På torsdag 12. juni 2014 kl. 21:49:59, skrev David G Johnston <david.g.johnston@gmail.com>:
SELECT
coalesce(apl.company_name, bns.company_name, pns.company_name, prs.company_name) AS company_name,
coalesce(apl.project_name, bns.project_name, pns.project_name, prs.project_name) AS project_name,
apl.qty AS num_apples,
bns.qty AS num_bananas,
pns.qty AS num_pineapples,
prs.qty AS num_pearsFROMcompany_apples AS aplFULL OUTER JOIN company_bananas AS bns
ON apl.company_id = bns.company_id
AND (apl.project_id IS NULL AND bns.project_id IS NULL OR apl.project_id = bns.project_id)FULL OUTER JOIN company_pineapples AS pns
ON apl.company_id = pns.company_id
AND (apl.project_id IS NULL AND pns.project_id IS NULL OR apl.project_id = pns.project_id)FULL OUTER JOIN company_pears AS prs
ON apl.company_id = prs.company_id
AND (apl.project_id IS NULL AND prs.project_id IS NULL OR apl.project_id = prs.project_id)ORDER BY company_name ASC, project_name ASC NULLS FIRST;This gives the result:
# company_name project_name num_apples num_bananas num_pineapples num_pears 1 C1 NULL 2 2 10 NULL 2 C1 P1 5 12 NULL NULL 3 C1 P2 2 NULL NULL NULL 4 C2 NULL 3 NULL 10 NULL 5 C2 P1 3 NULL NULL NULL 6 C2 P2 3 NULL NULL NULL 7 C3 NULL NULL 8 NULL NULL 8 C3 NULL NULL NULL NULL 7 As you see, there are two rows for C3, which should have been 1, with num_bananas=8 and num_pears=7. There has to be something wrong with my FULL OUTER JOINs but I don't know what....Row #7 is from the right side of the outer join between apples and bananasRow #8 is from the right side of the outer join between apples and pearsSince you are only using apple as the source of valid company-project pairs only those records will be "correct" per your definition.Even if you do not use a CROSS JOIN between company/product you have to obtain a master list of valid company-project pairs from ALL of the target tables. You can then LEFT JOIN that master against each of the target tables and be sure that you have a valid master record to attach to.The direct way to do this is:SELECT DISTINCT company_id, project_id FROM applesUNION DISTINCTSELECT DISTINCT company_id, project_id FROM pears[and so forth]
The problem with retreiving a list of companies/projects is that the actual sub-queries are pretty complex and don't run instantly, so I'll end up querying lots of things twice.
--
Andreas Jospeh Krogh
CTO / Partner - Visena AS
Mobile: +47 909 56 963
Attachment
Re: Re: Problem with duplicate rows when FULL OUTER JOIN'ing 3 derived tables
From
Andreas Joseph Krogh
Date:
På torsdag 12. juni 2014 kl. 21:52:22, skrev David G Johnston <david.g.johnston@gmail.com>:
SELECT
coalesce(apl.company_name, bns.company_name, pns.company_name, prs.company_name) AS company_name,
coalesce(apl.project_name, bns.project_name, pns.project_name, prs.project_name) AS project_name,
apl.qty AS num_apples,
bns.qty AS num_bananas,
pns.qty AS num_pineapples,
prs.qty AS num_pearsFROMcompany_apples AS aplFULL OUTER JOIN company_bananas AS bns
ON apl.company_id = bns.company_id
AND (apl.project_id IS NULL AND bns.project_id IS NULL OR apl.project_id = bns.project_id)FULL OUTER JOIN company_pineapples AS pns
ON apl.company_id = pns.company_id
AND (apl.project_id IS NULL AND pns.project_id IS NULL OR apl.project_id = pns.project_id)FULL OUTER JOIN company_pears AS prs
ON apl.company_id = prs.company_id
AND (apl.project_id IS NULL AND prs.project_id IS NULL OR apl.project_id = prs.project_id)ORDER BY company_name ASC, project_name ASC NULLS FIRST;This gives the result:
# company_name project_name num_apples num_bananas num_pineapples num_pears 1 C1 NULL 2 2 10 NULL 2 C1 P1 5 12 NULL NULL 3 C1 P2 2 NULL NULL NULL 4 C2 NULL 3 NULL 10 NULL 5 C2 P1 3 NULL NULL NULL 6 C2 P2 3 NULL NULL NULL 7 C3 NULL NULL 8 NULL NULL 8 C3 NULL NULL NULL NULL 7 As you see, there are two rows for C3, which should have been 1, with num_bananas=8 and num_pears=7. There has to be something wrong with my FULL OUTER JOINs but I don't know what....Row #7 is from the right side of the outer join between apples and bananasRow #8 is from the right side of the outer join between apples and pearsSince you are only using apple as the source of valid company-project pairs only those records will be "correct" per your definition.Even if you do not use a CROSS JOIN between company/product you have to obtain a master list of valid company-project pairs from ALL of the target tables. You can then LEFT JOIN that master against each of the target tables and be sure that you have a valid master record to attach to.The direct way to do this is:SELECT DISTINCT company_id, project_id FROM applesUNION DISTINCTSELECT DISTINCT company_id, project_id FROM pears[and so forth]David J.Though I guess you could also simply chain together the FULL OUTER join:FROM ( ( (apple OUTER bananas) AS ab OUTER pears ) AS abp OUTER pineapples ) AS abppDavid J.
By OUTER, do you mean FULL OUTER JOIN here?
I'm unsure how to write the correct ON-clause of my FULL OUTER JOINs. Do you know how?
There will be more derived tables to FULL OUTER JOIN with so I need something robust, just don't know how to do it.
Thanks.
--
Andreas Jospeh Krogh
CTO / Partner - Visena AS
Mobile: +47 909 56 963
Attachment
Re: Problem with duplicate rows when FULL OUTER JOIN'ing 3 derived tables
From
David G Johnston
Date:
På torsdag 12. juni 2014 kl. 21:52:22, skrev David G Johnston <[hidden email]>:SELECT
coalesce(apl.company_name, bns.company_name, pns.company_name, prs.company_name) AS company_name,
coalesce(apl.project_name, bns.project_name, pns.project_name, prs.project_name) AS project_name,
apl.qty AS num_apples,
bns.qty AS num_bananas,
pns.qty AS num_pineapples,
prs.qty AS num_pearsFROMcompany_apples AS aplFULL OUTER JOIN company_bananas AS bns
ON apl.company_id = bns.company_id
AND (apl.project_id IS NULL AND bns.project_id IS NULL OR apl.project_id = bns.project_id)FULL OUTER JOIN company_pineapples AS pns
ON apl.company_id = pns.company_id
AND (apl.project_id IS NULL AND pns.project_id IS NULL OR apl.project_id = pns.project_id)FULL OUTER JOIN company_pears AS prs
ON apl.company_id = prs.company_id
AND (apl.project_id IS NULL AND prs.project_id IS NULL OR apl.project_id = prs.project_id)ORDER BY company_name ASC, project_name ASC NULLS FIRST;This gives the result:
# company_name project_name num_apples num_bananas num_pineapples num_pears 1 C1 NULL 2 2 10 NULL 2 C1 P1 5 12 NULL NULL 3 C1 P2 2 NULL NULL NULL 4 C2 NULL 3 NULL 10 NULL 5 C2 P1 3 NULL NULL NULL 6 C2 P2 3 NULL NULL NULL 7 C3 NULL NULL 8 NULL NULL 8 C3 NULL NULL NULL NULL 7 As you see, there are two rows for C3, which should have been 1, with num_bananas=8 and num_pears=7. There has to be something wrong with my FULL OUTER JOINs but I don't know what....Row #7 is from the right side of the outer join between apples and bananasRow #8 is from the right side of the outer join between apples and pearsSince you are only using apple as the source of valid company-project pairs only those records will be "correct" per your definition.Even if you do not use a CROSS JOIN between company/product you have to obtain a master list of valid company-project pairs from ALL of the target tables. You can then LEFT JOIN that master against each of the target tables and be sure that you have a valid master record to attach to.The direct way to do this is:SELECT DISTINCT company_id, project_id FROM applesUNION DISTINCTSELECT DISTINCT company_id, project_id FROM pears[and so forth]David J.Though I guess you could also simply chain together the FULL OUTER join:FROM ( ( (apple OUTER bananas) AS ab OUTER pears ) AS abp OUTER pineapples ) AS abppDavid J.By OUTER, do you mean FULL OUTER JOIN here?
Yes
I'm unsure how to write the correct ON-clause of my FULL OUTER JOINs. Do you know how?
You ON-clause was just fine...
There will be more derived tables to FULL OUTER JOIN with so I need something robust, just don't know how to do it.
WITH
a_src (companyid, projectid, a_count) AS ( SELECT companyid, COALESCE(projectid,'N/A'), a_count FROM company_a)
, b_src (companyid, projectid, b_count) AS ( SELECT companyid, COALESCE(projectid,'N/A'), b_count FROM company_b)
, left_master AS ( SELECT DISTINCT companyid, projectid FROM a_src UNION DISTINCT SELECT DISTINCT companyid, projectid FROM b_src)
SELECT companyid, projectid, a_count, b_count
FROM left_master
LEFT JOIN a_src USING (companyid, projectid)
LEFT JOIN b_src USING (companyid, projectid)
;
If it is too slow to derive left_master you can consider adding triggers to the company_product tables to maintain a separate table of known combinations.
The chaining version:
WITH
a_src (companyid, projectid, a_count) AS ( SELECT companyid, COALESCE(projectid,'N/A'), a_count FROM company_a)
, b_src (companyid, projectid, b_count) AS ( SELECT companyid, COALESCE(projectid,'N/A'), b_count FROM company_b)
, c_src (companyid, projectid, c_count) AS ( SELECT companyid, COALESCE(projectid,'N/A'), c_count FROM company_c)
SELECT companyid, projectid, a_count, b_count, c_count
FROM (a_src FULL JOIN b_src USING (companyid, projectid)) ab_src FULL JOIN c_src USING (companyid, projectid)) abc_src
;
Though you could also test whether the following is faster:
a_raw FULL JOIN b_raw ON ((a_raw.companyid, COALESCE(a_raw.projectid, 'N/A')) = (b_raw.companyid, COALESCE(b_raw.projectid, 'N/A')))
Alternatively...go vertical:
WITH
a_src (companyid, projectid, item_count, item_type) AS ( SELECT companyid, COALESCE(projectid,'N/A'), a_count, 'A' FROM company_a)
, b_src (companyid, projectid, item_count, item_type) AS ( SELECT companyid, COALESCE(projectid,'N/A'), b_count, 'B' FROM company_b)
SELECT *
FROM a_src
UNION ALL
SELECT *
FROM b_src
;
David J.
View this message in context: Re: Problem with duplicate rows when FULL OUTER JOIN'ing 3 derived tables
Sent from the PostgreSQL - sql mailing list archive at Nabble.com.
Re: Re: Problem with duplicate rows when FULL OUTER JOIN'ing 3 derived tables
From
Andreas Joseph Krogh
Date:
På torsdag 12. juni 2014 kl. 22:36:23, skrev David G Johnston <david.g.johnston@gmail.com>:
[snip]WITHa_src (companyid, projectid, a_count) AS ( SELECT companyid, COALESCE(projectid,'N/A'), a_count FROM company_a), b_src (companyid, projectid, b_count) AS ( SELECT companyid, COALESCE(projectid,'N/A'), b_count FROM company_b), left_master AS ( SELECT DISTINCT companyid, projectid FROM a_src UNION DISTINCT SELECT DISTINCT companyid, projectid FROM b_src)SELECT companyid, projectid, a_count, b_countFROM left_masterLEFT JOIN a_src USING (companyid, projectid)LEFT JOIN b_src USING (companyid, projectid);If it is too slow to derive left_master you can consider adding triggers to the company_product tables to maintain a separate table of known combinations.The chaining version:WITHa_src (companyid, projectid, a_count) AS ( SELECT companyid, COALESCE(projectid,'N/A'), a_count FROM company_a), b_src (companyid, projectid, b_count) AS ( SELECT companyid, COALESCE(projectid,'N/A'), b_count FROM company_b), c_src (companyid, projectid, c_count) AS ( SELECT companyid, COALESCE(projectid,'N/A'), c_count FROM company_c)SELECT companyid, projectid, a_count, b_count, c_countFROM (a_src FULL JOIN b_src USING (companyid, projectid)) ab_src FULL JOIN c_src USING (companyid, projectid)) abc_src;Though you could also test whether the following is faster:a_raw FULL JOIN b_raw ON ((a_raw.companyid, COALESCE(a_raw.projectid, 'N/A')) = (b_raw.companyid, COALESCE(b_raw.projectid, 'N/A')))Alternatively...go vertical:WITHa_src (companyid, projectid, item_count, item_type) AS ( SELECT companyid, COALESCE(projectid,'N/A'), a_count, 'A' FROM company_a), b_src (companyid, projectid, item_count, item_type) AS ( SELECT companyid, COALESCE(projectid,'N/A'), b_count, 'B' FROM company_b)SELECT *FROM a_srcUNION ALLSELECT *FROM b_src;David J.
Your chaining version with WITH was the only one I could get to work.
I think that is the cleanest version as it wraps the rather large query behind the derived tables in a readable fashion.
Thanks!
--
Andreas Jospeh Krogh
CTO / Partner - Visena AS
Mobile: +47 909 56 963