Home > mailing lists

Re: Multi-parameter aggregates. - Mailing list pgsql-general

From	Berend Tober
Subject	Re: Multi-parameter aggregates.
Date	November 25, 2005 06:13:32
Msg-id	4386E3CE.6090604@seaworthysys.com Whole thread Raw
In response to	Re: Multi-parameter aggregates. (Michael Fuhr <mike@fuhr.org>)
List	pgsql-general

Tree view

Michael Fuhr wrote:

>On Mon, Nov 21, 2005 at 11:03:22AM -0500, Berend Tober wrote:
>
>
>>I'm interested in defining a covariance aggregate function. (As a
>>
>>
>...aggregates must take a single value, so the above won't
>work as written.  However, in PostgreSQL 8.0 or later you could
>define the aggregate's base type to be a composite type ...
>
>

Thanks for you help. Here is what I came up with, using the existing
POINT data type, which when you think about it makes a lot of sense, and
even thoug having to explicitly cast a pair of columns as a point type
is a little cludgy, I'm get the analytical tool I need, regardless.

CREATE TYPE public._regression AS (
    n integer,
    x double precision,
    y double precision,
    xy double precision
    );


CREATE OR REPLACE FUNCTION public.weighted_average(double precision,
double precision, double precision)
    RETURNS double precision AS '
    BEGIN
    IF $1>1.0 THEN
        RAISE EXCEPTION ''Weighted average % coefficient exceeds
unity.'', $1;
    ELSIF $1<0 THEN
        RAISE EXCEPTION ''Weighted average % coefficient less than
zero.'', $1;
    END IF;
    RETURN $1*$2 + (1.-$1)*$3;
    END;
    'LANGUAGE 'plpgsql' IMMUTABLE STRICT;


CREATE OR REPLACE FUNCTION public.regression_accum(_regression, point)
    RETURNS _regression AS '
    SELECT
        $1.n+1,
        weighted_average($1.n/($1.n+1.), $1.x, $2[0]),
        weighted_average($1.n/($1.n+1.), $1.y, $2[1]),
        weighted_average($1.n/($1.n+1.), $1.xy, $2[0]*$2[1]);
    'LANGUAGE 'sql' IMMUTABLE STRICT;
GRANT EXECUTE ON FUNCTION public.regression_accum(_regression, point) TO
public;
COMMENT ON FUNCTION public.regression_accum(_regression, point)
    IS 'regression aggregate transition function';

CREATE OR REPLACE FUNCTION public.covariance(_regression)
    RETURNS double precision AS '
    BEGIN
    IF $1.n = 0 THEN
        RAISE EXCEPTION ''No covariance data'';
    END IF;
    RETURN $1.xy - $1.x*$1.y;
    END;
    'LANGUAGE 'plpgsql' IMMUTABLE STRICT;
GRANT EXECUTE ON FUNCTION public.covariance(_regression) TO public;
COMMENT ON FUNCTION public.covariance(_regression)
    IS 'cov aggregate final function';

CREATE AGGREGATE public.cov(
  BASETYPE=point,
  SFUNC=regression_accum,
  STYPE=_regression,
  FINALFUNC=covariance,
  INITCOND='(0,0,0,0)'
);

-- Examples

CREATE LOCAL TEMPORARY TABLE test (
    example integer,
    x numeric,
    y numeric
) without oids;

-- Hand calculated: cov(x,y)=0.25
INSERT INTO test VALUES (1, 0.0, 0.0);
INSERT INTO test VALUES (1, 1.0, 1.0);

-- Hand calculated: cov(x,x)=0.389, cov(y,y)=1.556, cov(x,y)=0.778
INSERT INTO test VALUES (2, 0.0, 0.0);
INSERT INTO test VALUES (2, 1.0, 2.0);
INSERT INTO test VALUES (2, 1.5, 3.0);

-- Hand calculated: cov(x,x)=0.264, cov(y,y)=0.192, cov(x,y)=0.214
INSERT INTO test VALUES (3, 0.25, 0.125);
INSERT INTO test VALUES (3, 1.0, 1.0);
INSERT INTO test VALUES (3, 1.5, 1.1);


--
http://www.quantlet.com/mdstat/scripts/mva/htmlbook/mvahtmlnode22.html:
cov(x,y) = -80.02
INSERT INTO test VALUES (4, 230, 125);
INSERT INTO test VALUES (4, 181,  99);
INSERT INTO test VALUES (4, 165,  97);
INSERT INTO test VALUES (4, 150, 115);
INSERT INTO test VALUES (4,  97, 120);
INSERT INTO test VALUES (4, 192, 100);
INSERT INTO test VALUES (4, 181,  80);
INSERT INTO test VALUES (4, 189,  90);
INSERT INTO test VALUES (4, 172,  95);
INSERT INTO test VALUES (4, 170, 125);

/*
Note that the value COV(X,X) and COV(Y,Y) do not equal output from
the native Postgresql aggregates VARIANCE(x) and VARIANCE(y),
respectively. I cannot explain why they are different, but COV()
is consistent with direct calculation.
*/

SELECT
    'Example '||example AS title,
    COUNT(*) AS N,
    COV(POINT(x,x)) AS covXX,
    COV(POINT(y,y)) AS covYY,
    COV(POINT(x,y)) AS covXY,
    VARIANCE(x),
    VARIANCE(y)
    FROM test
    GROUP BY 1
    ORDER BY 1;

pgsql-general by date:

From: Nikolay Samokhvalov
Date: 25 November 2005, 06:01:17
Subject: Re: tool for DB design

From: Harald Armin Massa
Date: 25 November 2005, 06:15:08
Subject: Re: pg_dump --data-only: is dump ordered to keep foreign-key-relations loadable?

Re: Multi-parameter aggregates. - Mailing list pgsql-general

Previous

Next