Thread: olympics ranking query

olympics ranking query

From

David Garamond

Date:

20 August 2004, 13:37:55

See http://www.athens2004.com/en/OlympicMedals/medals?noc=MGL .

create table countrymedal (  countryid CHAR(3) PRIMARY KEY,  gold INT NOT NULL,  silver INT NOT NULL,  bronze INT NOT
NULL);

COPY countrymedal (countryid, gold, silver, bronze) FROM stdin;
ITA    5    6    3
FRA    5    3    5
UKR    5    1    1
RUS    4    8    10
GER    4    4    7
TUR    3    0    1
KOR    2    7    3
NED    2    5    4
HUN    2    3    1
SVK    2    2    1
ROM    2    0    2
GRE    2    0    1
POL    1    2    1
BLR    1    1    2
SUI    1    0    1
UAE    1    0    0
GBR    0    4    4
AUT    0    3    0
PRK    0    2    1
ESP    0    2    0
CUB    0    1    5
CZE    0    1    2
ZIM    0    1    1
USA    15    11    10
CHN    15    9    8
JPN    9    4    2
AUS    7    5    8
GEO    1    1    0
RSA    1    1    0
BUL    1    0    2
THA    1    0    2
IND    0    1    0
INA    0    1    0
KAZ    0    1    0
POR    0    1    0
SCG    0    1    0
AZE    0    0    2
BEL    0    0    2
BRA    0    0    2
DEN    0    0    2
ARG    0    0    1
CAN    0    0    1
COL    0    0    1
CRO    0    0    1
ISR    0    0    1
MGL    0    0    1
SLO    0    0    1
TRI    0    0    1
\.

create sequence seq1;
create sequence seq2;

-- query #1: list of ranks by gold
select setval('seq1', 1);
select setval('seq2', 1);
select  setval('seq1', currval('seq1')+setval('seq2',count(*)))-count(*) as rank,  count(*) as numranker,  gold,
silver,bronze
 
from countrymedal
group by gold, silver, bronze
order by gold desc, silver desc, bronze desc;

-- result of query #1 rank | numranker | gold | silver | bronze
------+-----------+------+--------+--------    1 |         1 |   15 |     11 |     10    2 |         1 |   15 |      9
|     8    3 |         1 |    9 |      4 |      2    4 |         1 |    7 |      5 |      8    5 |         1 |    5 |
  6 |      3    6 |         1 |    5 |      3 |      5    7 |         1 |    5 |      1 |      1    8 |         1 |
4|      8 |     10    9 |         1 |    4 |      4 |      7   10 |         1 |    3 |      0 |      1   11 |         1
|   2 |      7 |      3   12 |         1 |    2 |      5 |      4   13 |         1 |    2 |      3 |      1   14 |
  1 |    2 |      2 |      1   15 |         1 |    2 |      0 |      2   16 |         1 |    2 |      0 |      1   17 |
       1 |    1 |      2 |      1   18 |         1 |    1 |      1 |      2   19 |         2 |    1 |      1 |      0
21|         2 |    1 |      0 |      2   23 |         1 |    1 |      0 |      1   24 |         1 |    1 |      0 |
0   25 |         1 |    0 |      4 |      4   26 |         1 |    0 |      3 |      0   27 |         1 |    0 |      2
|     1   28 |         1 |    0 |      2 |      0   29 |         1 |    0 |      1 |      5   30 |         1 |    0 |
  1 |      2   31 |         1 |    0 |      1 |      1   32 |         5 |    0 |      1 |      0   37 |         4 |
0|      0 |      2   41 |         8 |    0 |      0 |      1
 
(32 rows)

-- query #2: list of countries ordered by their ranks
select setval('seq1', 1);
select setval('seq2', 1);
select  (case numranker when 1 then '' else '=' end) || rank as rank,  countryid,  cm.gold, cm.silver, cm.bronze
from countrymedal cm
left join  (select  setval('seq1',         currval('seq1')+setval('seq2',count(*))        )-count(*) as rank,  count(*)
asnumranker,  gold, silver, bronze  from countrymedal  group by gold, silver, bronze  order by gold desc, silver desc,
bronzedesc  ) t1 on cm.gold=t1.gold and cm.silver=t1.silver and cm.bronze=t1.bronze
 
order by t1.rank;

-- result of query #2 rank | countryid | gold | silver | bronze
------+-----------+------+--------+-------- 1    | USA       |   15 |     11 |     10 2    | CHN       |   15 |      9
|     8 3    | JPN       |    9 |      4 |      2 4    | AUS       |    7 |      5 |      8 5    | ITA       |    5 |
  6 |      3 6    | FRA       |    5 |      3 |      5 7    | UKR       |    5 |      1 |      1 8    | RUS       |
4|      8 |     10 9    | GER       |    4 |      4 |      7 10   | TUR       |    3 |      0 |      1 11   | KOR
|   2 |      7 |      3 12   | NED       |    2 |      5 |      4 13   | HUN       |    2 |      3 |      1 14   | SVK
    |    2 |      2 |      1 15   | ROM       |    2 |      0 |      2 16   | GRE       |    2 |      0 |      1 17   |
POL      |    1 |      2 |      1 18   | BLR       |    1 |      1 |      2 =19  | GEO       |    1 |      1 |      0
=19 | RSA       |    1 |      1 |      0 =21  | BUL       |    1 |      0 |      2 =21  | THA       |    1 |      0 |
  2 23   | SUI       |    1 |      0 |      1 24   | UAE       |    1 |      0 |      0 25   | GBR       |    0 |
4|      4 26   | AUT       |    0 |      3 |      0 27   | PRK       |    0 |      2 |      1 28   | ESP       |    0 |
    2 |      0 29   | CUB       |    0 |      1 |      5 30   | CZE       |    0 |      1 |      2 31   | ZIM       |
0 |      1 |      1 =32  | IND       |    0 |      1 |      0 =32  | INA       |    0 |      1 |      0 =32  | KAZ
|    0 |      1 |      0 =32  | POR       |    0 |      1 |      0 =32  | SCG       |    0 |      1 |      0 =37  | AZE
     |    0 |      0 |      2 =37  | BEL       |    0 |      0 |      2 =37  | BRA       |    0 |      0 |      2 =37
|DEN       |    0 |      0 |      2 =41  | ARG       |    0 |      0 |      1 =41  | CAN       |    0 |      0 |      1
=41 | COL       |    0 |      0 |      1 =41  | CRO       |    0 |      0 |      1 =41  | ISR       |    0 |      0 |
  1 =41  | MGL       |    0 |      0 |      1 =41  | SLO       |    0 |      0 |      1 =41  | TRI       |    0 |
0|      1
 


Challenge question: is there a simpler way to do query #1 (without any 
PL, and if possible without sequences too?

-- 
dave

Re: olympics ranking query

From

Tom Lane

Date:

20 August 2004, 13:46:25

David Garamond <lists@zara.6.isreserved.com> writes:
> Challenge question: is there a simpler way to do query #1 (without any 
> PL, and if possible without sequences too?

Can't without sequences AFAIK, but you certainly can do it simpler:

select setval('seq1', 0);

select nextval('seq1'), * from
(select count(*) as numranker,gold, silver, bronzefrom countrymedalgroup by gold, silver, bronzeorder by gold desc,
silverdesc, bronze desc) ss;
 

        regards, tom lane

Re: olympics ranking query

From

Bruno Wolff III

Date:

20 August 2004, 13:53:17

On Fri, Aug 20, 2004 at 23:40:08 +0700, David Garamond <lists@zara.6.isreserved.com> wrote:
> 
> Challenge question: is there a simpler way to do query #1 (without any 
> PL, and if possible without sequences too?

You could use a subselect to count how many countries had a lower
medal ranking and add 1 to get the rank. This should be a lot more standard
than using sequences. It will probably be a little slower, but for tables
of that size it shouldn't be a big deal.

Re: olympics ranking query

From

David Garamond

Date:

20 August 2004, 14:09:30

Tom Lane wrote:
>>Challenge question: is there a simpler way to do query #1 (without any 
>>PL, and if possible without sequences too?
> 
> Can't without sequences AFAIK, but you certainly can do it simpler:
> 
> select setval('seq1', 0);
> 
> select nextval('seq1'), * from
> (select count(*) as numranker,
>  gold, silver, bronze
>  from countrymedal
>  group by gold, silver, bronze
>  order by gold desc, silver desc, bronze desc) ss;

This is not quite the same. The ranks are sequential, but they skip, so 
as to match the number of participating countries.

-- 
dave

Re: olympics ranking query

From

Tom Lane

Date:

20 August 2004, 14:21:59

David Garamond <lists@zara.6.isreserved.com> writes:
> This is not quite the same. The ranks are sequential, but they skip, so 
> as to match the number of participating countries.

Oh, I missed that bit.

What you really want here is a "running sum" function, that is
SELECT running_sum(numranker) as rank, * FROM(same subselect as before) ss;

There is no such thing in standard SQL, because it's fundamentally
dependent on the assumption of the input data coming in a particular
order, which is Evil Incarnate according to the relational worldview.
But it's not too hard to do in PLs that allow persistent state.
I recall Elein having exhibited one in plpython(?) not too long ago
--- you might find it on techdocs or varlena.com.

You could brute-force it with a subselect (essentially "SUM(numranker)
over all rows that should precede this one") but that would involve
recomputing the whole subselect for each output row, which doesn't seem
very attractive.
        regards, tom lane

Re: olympics ranking query

From

David Garamond

Date:

20 August 2004, 14:25:34

Bruno Wolff III wrote:
> On Fri, Aug 20, 2004 at 23:40:08 +0700,
>   David Garamond <lists@zara.6.isreserved.com> wrote:
> 
>>Challenge question: is there a simpler way to do query #1 (without any 
>>PL, and if possible without sequences too?
> 
> You could use a subselect to count how many countries had a lower
> medal ranking and add 1 to get the rank. This should be a lot more standard
> than using sequences. It will probably be a little slower, but for tables
> of that size it shouldn't be a big deal.

Thanks for the tip. This is what I came up with:

select  (select count(*) from countrymedal c1 where   c1.gold>c2.gold or     (c1.gold=c2.gold and (c1.silver>c2.silver
or      (c1.silver=c2.silver and c1.bronze>c2.bronze))))+1 as rank,  count(*) as numranker,  gold, silver, bronze
 
from countrymedal c2
group by gold, silver, bronze
order by rank;

-- 
dave

Re: olympics ranking query

From

Mischa Sandberg

Date:

25 August 2004, 22:10:32

That 'running aggregate' notion comes up quite regularily,
and it has a number of nonintuitive holes, to do with
what happens when your ordering allows for ties.
ASTRID had it, RedBrick SQL had it, the latest MSSQL
has it ... not necessarily a recommendation.

Tom Lane wrote:
> David Garamond <lists@zara.6.isreserved.com> writes:
> 
>>This is not quite the same. The ranks are sequential, but they skip, so 
>>as to match the number of participating countries.
> 
> 
> Oh, I missed that bit.
> 
> What you really want here is a "running sum" function, that is
> 
>     SELECT running_sum(numranker) as rank, * FROM
>     (same subselect as before) ss;
> 
> There is no such thing in standard SQL, because it's fundamentally
> dependent on the assumption of the input data coming in a particular
> order, which is Evil Incarnate according to the relational worldview.
> But it's not too hard to do in PLs that allow persistent state.
> I recall Elein having exhibited one in plpython(?) not too long ago
> --- you might find it on techdocs or varlena.com.
> 
> You could brute-force it with a subselect (essentially "SUM(numranker)
> over all rows that should precede this one") but that would involve
> recomputing the whole subselect for each output row, which doesn't seem
> very attractive.
> 
>             regards, tom lane
> 
> ---------------------------(end of broadcast)---------------------------
> TIP 7: don't forget to increase your free space map settings
>

Re: olympics ranking query

From

Kyle Bateman

Date:

29 March 2007, 18:56:23

Several years ago someone posted a question about how to achieve a 
running total of columns in sql.  I have been trying to find a solution 
to a similar problem for some time and just came up with something that 
works great for me so I thought I'd post it for the benefit of the list.

The problem is to produce a query like the following:

select date,amount,run_sum(amount) from ledger order by date;

Date        Amount     Sum
----------- ---------- ---------
2007-Jan-01      10.00     10.00
2007-Jan-02      20.00     30.00
2007-Jan-05       5.00     35.00
2007-Jan-10      -3.00     32.00
.
.
.

Using pl/tcl, I made the following function:

#Keep a running sum on a column where tag and trans are used to keep the
#results distinct from other calls to the same function
#Usage: run_sum(value,tag,trans)
#-------------------------------------------
function run_sum(numeric,text,text) {} { returns numeric language pltcl called on null input as $$   if {![info exists
GD(sum.$2.$3)]}{       set GD(sum.$2.$3) 0.00   }   if {[argisnull 1]} {       return $GD(sum.$2.$3)   } else {
return[set GD(sum.$2.$3) [expr $GD(sum.$2.$3) + $1]]   }
 
$$;}

Then I added a wrapper function to automatically produce a separate 
namespace for each statement instance that uses the query:

#Keep a running sum on a column where tag is used to keep the results 
distinct
#Automatically keeps results distinct for each separate statement
#Usage: run_sum(value,tag)
#-------------------------------------------
function run_sum(numeric,text) {run_sum(numeric,text,text)} { returns numeric language sql as $$   select
run_sum($1,$2,statement_timestamp()::text);
$$;}

Now you can do:

select date,amount,run_sum(amount,'amount') from ledger;

to get an initial result.  The only problem is now ordering the data.  
If you just put an 'order by' clause on the end, you don't get what you 
might expect because the ordering happens after the function has 
produced its result.  So I do the following to order and sum it correctly:

select date,amount,run_sum(amount,'amount') from (select * from ledger 
order by date) ss;

The use of the "tag" parameter allows you to use this on multiple 
columns such as:

select date,debit,credit,   run_sum(debit,'debit')::numeric(14,2) as debits,   run_sum(credit,'credit')::numeric(14,2)
ascredits   from (select * from ledger order by date) ss;
 

Enjoy,

Kyle