Re: count(*) performance improvement ideas - Mailing list pgsql-hackers

From PFC
Subject Re: count(*) performance improvement ideas
Date
Msg-id op.t9qbpigrcigqcu@apollo13.peufeu.com
Whole thread Raw
In response to Re: count(*) performance improvement ideas  ("Stephen Denne" <Stephen.Denne@datamail.co.nz>)
Responses Re: count(*) performance improvement ideas  ("Stephen Denne" <Stephen.Denne@datamail.co.nz>)
List pgsql-hackers
>> The whole thing is a bit of an abuse of what the mechanism
>> was intended
>> for, and so I'm not sure we should rejigger GUC's behavior to make it
>> more pleasant, but on the other hand if we're not ready to provide a
>> better substitute ...
>
> In my experiments with materialized views, I identified these problems
> as "minor" difficulties. Resolving them would allow further abuse ;)

Let's try this quick & dirty implementation of a local count-delta cache
using a local in-memory hashtable (ie. {}).
Writing the results to stable storage in an ON COMMIT trigger is left as
an exercise to the reader ;)
Performance isn't that bad, calling the trigger takes about 50 us.
Oldskool implementation with a table is at the end, it's about 10x slower.

Example :

INSERT INTO victim1 (key) VALUES ('one'),('two'),('two');
INSERT 0 3
Temps : 1,320 ms
test=# SELECT * FROM get_count(); key | cnt
-----+----- two |   2 one |   1


CREATE OR REPLACE FUNCTION clear_count(  )  RETURNS VOID
AS $$    GD.clear()
$$ LANGUAGE plpythonu;

CREATE OR REPLACE FUNCTION update_count( key TEXT, delta INTEGER )  RETURNS INTEGER
AS $$    if key in GD:            GD[key] += delta    else:            GD[key] = delta    return GD[key]
$$ LANGUAGE plpythonu;

CREATE TYPE count_data AS ( key TEXT, cnt INTEGER );

CREATE OR REPLACE FUNCTION get_count( )
RETURNS SETOF count_data
AS $$    return GD.iteritems()
$$ LANGUAGE plpythonu;


CREATE TABLE victim( id SERIAL PRIMARY KEY, key TEXT NOT NULL );
INSERT INTO victim (key) SELECT (random() * 300)::INTEGER::TEXT FROM
generate_series( 1,100000 );

CREATE TABLE victim1( id SERIAL PRIMARY KEY, key TEXT NOT NULL );

\timing
INSERT INTO victim1 SELECT * FROM victim;
TRUNCATE TABLE victim1;

SELECT clear_count();
INSERT INTO victim1 SELECT * FROM victim RETURNING update_count( key, 1 );
SELECT * FROM get_count();
TRUNCATE TABLE victim1;

CREATE OR REPLACE FUNCTION counter_trigger_f()    RETURNS TRIGGER    LANGUAGE plpgsql    AS
$$
DECLARE
BEGIN    IF TG_OP = 'INSERT' THEN        PERFORM update_count( NEW.key, 1 );        RETURN NEW;    ELSEIF TG_OP =
'UPDATE'THEN        -- update topic        IF NEW.key != OLD.key THEN            PERFORM update_count( OLD.key, -1 ),
update_count(NEW.key, 1   
);        END IF;        RETURN NEW;    ELSE    -- DELETE        PERFORM update_count( OLD.key, -1 );        RETURN
OLD;   END IF; 
END;
$$;

CREATE TRIGGER count_trigger BEFORE INSERT OR UPDATE OR DELETE ON victim1
FOR EACH ROW EXECUTE PROCEDURE counter_trigger_f();

SELECT clear_count();
INSERT INTO victim1 SELECT * FROM victim;
SELECT * FROM get_count();

SELECT clear_count();
TRUNCATE TABLE victim1;
INSERT INTO victim1 (key) VALUES ('one'),('two'),('two');
SELECT * FROM get_count();
DELETE FROM victim1 WHERE key='two';
SELECT * FROM get_count();
UPDATE victim1 SET key='three' WHERE key='one';
SELECT * FROM get_count();
DELETE FROM victim1;
SELECT * FROM get_count();


CREATE TABLE counts( key TEXT PRIMARY KEY, total INTEGER NOT NULL DEFAULT
0 );

CREATE OR REPLACE FUNCTION table_counter_trigger_f()    RETURNS TRIGGER    LANGUAGE plpgsql    AS
$$
DECLARE
BEGIN    IF TG_OP = 'INSERT' THEN        UPDATE counts SET total=total+1 WHERE key=NEW.key;        IF NOT FOUND THEN
INSERTINTO counts (key,total) VALUES   
(NEW.key,1); END IF;        RETURN NEW;    ELSEIF TG_OP = 'UPDATE' THEN        -- update topic        IF NEW.key !=
OLD.keyTHEN            UPDATE counts SET total=total-1 WHERE key=OLD.key;            UPDATE counts SET total=total+1
WHEREkey=NEW.key;            IF NOT FOUND THEN INSERT INTO counts (key,total) VALUES   
(NEW.key,1); END IF;        END IF;        RETURN NEW;    ELSE    -- DELETE            UPDATE counts SET total=total-1
WHEREkey=OLD.key;        RETURN OLD;    END IF; 
END;
$$;

CREATE TABLE victim2( id SERIAL PRIMARY KEY, key TEXT NOT NULL );

CREATE TRIGGER table_count_trigger BEFORE INSERT OR UPDATE OR DELETE ON
victim2 FOR EACH ROW EXECUTE PROCEDURE table_counter_trigger_f();
SELECT * FROM counts;
TRUNCATE TABLE victim2;
INSERT INTO victim2 SELECT * FROM victim;




pgsql-hackers by date:

Previous
From: Tom Lane
Date:
Subject: Re: count(*) performance improvement ideas
Next
From: "Stephen Denne"
Date:
Subject: Re: count(*) performance improvement ideas