Slow sub-selects, max and count(*) - Mailing list pgsql-sql

From Richard Sydney-Smith
Subject Slow sub-selects, max and count(*)
Date
Msg-id 001001c3eb8a$255d6dc0$5801a8c0@athlon2000
Whole thread Raw
Responses Re: Slow sub-selects, max and count(*)
List pgsql-sql
I have a procedure ( included below with table definition) to import end of day quotes into a table fsechist.
 
The issue is with the speed ( or lackof it) that the procedure proceeds. Apparently it is the subselects that are the worst issue and I have tried to replace these. Also max() and count() refuse to use indexes. I have replaced max() with (select ... as mx from ... order by ... desc limit 1) and this is much faster but appear to have to avoid count() as I can not find a way to point this at an index.
 
Please, I am sure that there is a better way to do this. And 5 to 7 minutes to insert a day is really too slow. Each days import table contains about 3200 records. Total table size for fsechist is about 2.5 million records.
 
 
If you could point me in the right direction here it would be much appreciated.
 
thanks
 
Richard Sydney-Smith
 
 
--------------------------------------------------------------------------------------------------------
 
CREATE OR REPLACE FUNCTION public.import_sharedata(varchar)
  RETURNS bool AS
'
 
declare
   filnam alias for $1;
   alldone bool := true;
   /* cnt integer := 0; */  /* count of number inserted not used */
 
begin
/*
   Imports a text file containing end of day price quotes
   filnam should be in double backslash format e.g. c:\\data\\sharequotes\\quotes.dat
 
   Ensures that it only brings in quotes for companies we have listed in our database
   and ensures only the latest copy of the quote is kept for each day
 
   Import table has 7 columns in CSV tab delimited ascii format.
      EzyChart format has prices in cents, 
      METASTOCK in dollars
      MetaStock +100 has the volume divided by 100.
  
   Depending on your import data source you may need to process the temp_shareprices so that the
   values are in Dollars , not cents and the volume is per unit not per 100 units.
 
   Present calculation is for EzyChart format.
 
   Both tables  have to have a 4 digit year yyyymmdd
 
   Warning : If you have the wrong format this procedure will import garbage into your database !!!
 
*/
 
  perform drop_table(\'temp_shareprices\');
 
  create table temp_shareprices(
    ticker char(10) null,
    dte char(8),
    v1 numeric(19,4) not null default 0,
    v2 numeric(19,4) not null default 0,
    v3 numeric(19,4) not null default 0,
    v4 numeric(19,4) not null default 0,
    v5 numeric(19,4) not null default 0
    );
  execute \'copy temp_shareprices from \'||quote_literal(filnam)||\' delimiter \'||quote_literal(\',\');
 

-- extend yr to 4 digits, alter quotes to dollars
 
  update temp_shareprices set dte = \'20\'||dte,v1=v1/100,v2=v2/100,v3=v3/100,v4=v4/100;
 
-- add in any new company tickers
insert into fsecurity (sec_cde) select ticker from temp_shareprices where length(trim(ticker))=3 and ticker not in ( select sec_cde from fsecurity);
 
-- ====================
-- remove rows from temp_shareprices where we do not have a corresponding security
-- using sub-select is slow
-- delete from temp_shareprices where ticker not in (select sec_cde from fsecurity);
-- can just delete ones where length<>3 as insert above ensures all length 3 are inserted. All ASX codes are 3char long
delete from temp_shareprices where length(trim(ticker))<>3;
 
-- Attempt to remove without limiting to length and not using sub select does not work
--update temp_shareprices set ticker = s.sec_cde from temp_shareprices t left outer join fsecurity s on t.ticker = s.sec_cde;
--delete from temp_shareprices where ticker is null;
-- ==================== 

-- put indexes on temp table
CREATE INDEX temphist_tick ON temp_shareprices  USING btree  (ticker);
CREATE INDEX temphist_dte  ON temp_shareprices  USING btree  (dte);
 
 
-- ==================== 
-- Scan the input table and delete as appropriate from fsechist
 
-- Using sub select to remove any pre-existing records in fsechist very slow
-- delete from fsechist where hist_q in (select hist_q from fsechist, temp_shareprices where hist_tick = ticker and dte = hist_date);
 
-- Tried to replace subselect
update fsechist set hist_tick = \'@@\' from fsechist h, temp_shareprices t where t.ticker = h.hist_tick and h.hist_date = t.dte;
delete from fsechist where hist_tick = \'@@\';
-- ==================== 
 
-- insert newvalues
insert into fsechist(hist_tick,hist_date,hist_v1,hist_v2,hist_v3,hist_v4,hist_vol) select   ticker,dte,v1,v2,v3,v4,v5 from temp_shareprices;
 
-- count the number inserted
-- select   count(*) into cnt from temp_shareprices;
 
-- perform  drop_table(\'temp_shareprices\');
 
return alldone;
 
end;'
  LANGUAGE 'plpgsql' VOLATILE;
 
 
Note : drop_table is a simple procedure that drops a procedure by name iff it exists.
 
==============================================
Definition of Fsechist
 
CREATE TABLE public.fsechist
(
  hist_q int4 NOT NULL DEFAULT nextval('hist_q'::text),
  hist_v1 numeric(19,4),
  hist_v2 numeric(19,4),
  hist_v3 numeric(19,4),
  hist_v4 numeric(19,4),
  hist_vol numeric(19,4),
  hist_deltar numeric(19,4),
  hist_deltag numeric(19,4),
  hist_date char(8),
  hist_tick char(10),
  hist_announce char(1),
  CONSTRAINT fsechist_pkey PRIMARY KEY (hist_q)
) WITH OIDS;
 
CREATE INDEX fsechist_date  ON public.fsechist  USING btree  (hist_date);
 
CREATE UNIQUE INDEX fsechist_q  ON public.fsechist  USING btree  (hist_q);
CREATE INDEX fsechist_tick  ON public.fsechist  USING btree  (hist_tick);

pgsql-sql by date:

Previous
From: Richard Huxton
Date:
Subject: Re: TIME ZONE SQL
Next
From: Josh Berkus
Date:
Subject: Re: Slow sub-selects, max and count(*)