Thread: how to use array with "holes" ?

how to use array with "holes" ?

From
Anton
Date:
Hi.

I want to use array for store some values (bytes_in and bytes_out) and
use array index as "id". But I got an errors...
My example function extract traf_id, bytes_in, bytes_out and try to
fill an array, like this

CREATE OR REPLACE FUNCTION myf_test() RETURNS void
AS $$
DECLARE
 p_tmp RECORD;
 p_last_cpnt RECORD;
 p_bytes_in bigint[];
 p_bytes_out bigint[];
 i int;
BEGIN
 SELECT * INTO p_last_cpnt FROM nn_cpnt WHERE account_id = 5 ORDER BY
date_time DESC, cpnt_id DESC LIMIT 1;
 IF FOUND THEN
  FOR p_tmp IN SELECT ttc_id, bytes_in, bytes_out FROM nn_cpnt_traf
WHERE cpnt_id = p_last_cpnt.cpnt_id ORDER BY ttc_id LOOP
   --RAISE NOTICE '[%] -> [%] [%]', p_tmp.ttc_id, p_tmp.bytes_in,
p_tmp.bytes_out;
   i := p_tmp.ttc_id;
   RAISE NOTICE 'i = [%]', i;
   p_bytes_in[i] := p_tmp.bytes_in;
   p_bytes_out[i] := p_tmp.bytes_out;
   RAISE NOTICE 'p_bytes_in[%] = [%] / p_bytes_out[%] = [%]', i,
p_bytes_in[i], i, p_bytes_out[i];
  END LOOP;
 END IF;
 -- ... some work. And I prepare to "INSERT INTO tbl" from my array.
So I iterate through my array (but now I just RAISE NOTICE here).
 FOR i IN COALESCE(array_lower(p_bytes_in,1),1) ..
COALESCE(array_upper(p_bytes_in,1),-1) LOOP
   RAISE NOTICE 'p_bytes_in[%] = [%] / p_bytes_out[%] = [%]', i,
p_bytes_in[i], i, p_bytes_out[i];
 END LOOP;
END;
$$
LANGUAGE plpgsql;

But
=# SELECT myf_test();
NOTICE:  i = [1]
NOTICE:  p_bytes_in[1] = [1907262814] / p_bytes_out[1] = [9308877139]
NOTICE:  i = [5]
ERROR:  invalid array subscripts
CONTEXT:  PL/pgSQL function "myf_test" line 14 at assignment

There are "hole" between 1 and 5, so I think that is the problem... I
try to use temporary table (truncate it always before computations),
but it seems slowly than arrays. I think arrays are less complicated
so they operate faster.
Please, help. Explain me how to use array?
--
engineer

Re: how to use array with "holes" ?

From
"Pavel Stehule"
Date:
hello,

you have to initialise array before using. Like:

declare a int[] = '{0,0,0,0,0, .................}';
begin
 a[10] := 11;

..

reason: older postgresql versions unsuported nulls in array

regards
Pavel

2007/5/31, Anton <anton200@gmail.com>:
> Hi.
>
> I want to use array for store some values (bytes_in and bytes_out) and
> use array index as "id". But I got an errors...
> My example function extract traf_id, bytes_in, bytes_out and try to
> fill an array, like this
>
> CREATE OR REPLACE FUNCTION myf_test() RETURNS void
> AS $$
> DECLARE
>  p_tmp RECORD;
>  p_last_cpnt RECORD;
>  p_bytes_in bigint[];
>  p_bytes_out bigint[];
>  i int;
> BEGIN
>  SELECT * INTO p_last_cpnt FROM nn_cpnt WHERE account_id = 5 ORDER BY
> date_time DESC, cpnt_id DESC LIMIT 1;
>  IF FOUND THEN
>   FOR p_tmp IN SELECT ttc_id, bytes_in, bytes_out FROM nn_cpnt_traf
> WHERE cpnt_id = p_last_cpnt.cpnt_id ORDER BY ttc_id LOOP
>    --RAISE NOTICE '[%] -> [%] [%]', p_tmp.ttc_id, p_tmp.bytes_in,
> p_tmp.bytes_out;
>    i := p_tmp.ttc_id;
>    RAISE NOTICE 'i = [%]', i;
>    p_bytes_in[i] := p_tmp.bytes_in;
>    p_bytes_out[i] := p_tmp.bytes_out;
>    RAISE NOTICE 'p_bytes_in[%] = [%] / p_bytes_out[%] = [%]', i,
> p_bytes_in[i], i, p_bytes_out[i];
>   END LOOP;
>  END IF;
>  -- ... some work. And I prepare to "INSERT INTO tbl" from my array.
> So I iterate through my array (but now I just RAISE NOTICE here).
>  FOR i IN COALESCE(array_lower(p_bytes_in,1),1) ..
> COALESCE(array_upper(p_bytes_in,1),-1) LOOP
>    RAISE NOTICE 'p_bytes_in[%] = [%] / p_bytes_out[%] = [%]', i,
> p_bytes_in[i], i, p_bytes_out[i];
>  END LOOP;
> END;
> $$
> LANGUAGE plpgsql;
>
> But
> =# SELECT myf_test();
> NOTICE:  i = [1]
> NOTICE:  p_bytes_in[1] = [1907262814] / p_bytes_out[1] = [9308877139]
> NOTICE:  i = [5]
> ERROR:  invalid array subscripts
> CONTEXT:  PL/pgSQL function "myf_test" line 14 at assignment
>
> There are "hole" between 1 and 5, so I think that is the problem... I
> try to use temporary table (truncate it always before computations),
> but it seems slowly than arrays. I think arrays are less complicated
> so they operate faster.
> Please, help. Explain me how to use array?
> --
> engineer
>
> ---------------------------(end of broadcast)---------------------------
> TIP 6: explain analyze is your friend
>

Re: how to use array with "holes" ?

From
Anton
Date:
> you have to initialise array before using. Like:
>
> declare a int[] = '{0,0,0,0,0, .................}';
> begin
>  a[10] := 11;

Ok, I got it, thanks! Now I can work with simle arrays.

May I ask some more complex? I want to use ONE multidimensial array -
the "id", "bytes_in" and "bytes_out". By another words, I need an
array, each element of which must contain 3 values: ttc_id, bytes_in,
bytes_out.

I think it can be done like this:

CREATE OR REPLACE FUNCTION myf_test() RETURNS void
AS $$
DECLARE
 p_tmp RECORD;
 p_last_cpnt RECORD;
 p_bytes bigint[][][][];
 i int;
BEGIN

 SELECT * INTO p_last_cpnt FROM nn_cpnt WHERE account_id = 5 ORDER BY
date_time DESC, cpnt_id DESC LIMIT 1;
 IF FOUND THEN
  FOR p_tmp IN SELECT ttc_id, bytes_in, bytes_out FROM nn_cpnt_traf
WHERE cpnt_id = p_last_cpnt.cpnt_id ORDER BY ttc_id LOOP
   --RAISE NOTICE 'ttc_id[%] -> in[%] out[%]', p_tmp.ttc_id,
p_tmp.bytes_in, p_tmp.bytes_out;
  -- get the next number for array index, ugly but works
   i = array_upper(p_bytes, 1) + 1;
   IF i IS NULL THEN
    i := 0;
   END IF;
   -- here I try to add element
   p_bytes[i] := ARRAY[p_tmp.ttc_id,p_tmp.bytes_in,p_tmp.bytes_out];
  END LOOP;
 END IF;
 -- ... some work. and here is the beauty of multidimensial. As I think...
 FOR i IN COALESCE(array_lower(p_bytes,1),1) ..
COALESCE(array_upper(p_bytes,1),-1) LOOP
   RAISE NOTICE 'p_bytes[%] = [%] / [%] / [%]', i, p_bytes[i][0],
p_bytes[i][1], p_bytes[i][2];
 END LOOP;
END;
$$
LANGUAGE plpgsql;

But I make an error somewhere, again...
=# SELECT myf_test();
ERROR:  invalid input syntax for integer: "{1,1907262814,9308877139}"
CONTEXT:  PL/pgSQL function "myf_test" line 18 at assignment

I think I not understand somewhat...
--
engineer

Re: how to use array with "holes" ?

From
"Pavel Stehule"
Date:
>
> May I ask some more complex? I want to use ONE multidimensial array -
> the "id", "bytes_in" and "bytes_out". By another words, I need an
> array, each element of which must contain 3 values: ttc_id, bytes_in,
> bytes_out.
>
> I think it can be done like this:
>

It's problem. You have to wait for 8.3 where composite types in arrays
are supported, or simply use three array variables (or use plperl or
plpython). Arrays in plpgsql can be slow, if you often update big
arrays.

Re: how to use array with "holes" ?

From
Anton
Date:
> > May I ask some more complex? I want to use ONE multidimensial array -
> > the "id", "bytes_in" and "bytes_out". By another words, I need an
> > array, each element of which must contain 3 values: ttc_id, bytes_in,
> > bytes_out.
> >
> > I think it can be done like this:
>
> It's problem. You have to wait for 8.3 where composite types in arrays
> are supported, or simply use three array variables (or use plperl or
> plpython). Arrays in plpgsql can be slow, if you often update big
> arrays.

Ok, got it, thanks for explanations. Actually I already use that.

If someone interesting here is the example.

initialisation:
 FOR p_tmp IN SELECT DISTINCT ON(ttc_id) ttc_id FROM ttc_ids
 LOOP
  -- get next value for index
  i = array_upper(p_ttc_ids, 1) + 1; IF i IS NULL THEN i := 0; END IF;
  --RAISE NOTICE '[%]', i;
  p_ttc_ids[i] := p_tmp.ttc_id;
  p_bytes_in[i] := 0;
  p_bytes_out[i] := 0;
 END LOOP;

example work:
 X_ttc_id := ...
 FOR i IN COALESCE(array_lower(p_ttc_ids,1),1) ..
COALESCE(array_upper(p_ttc_ids,1),-1)
 LOOP
  IF p_ttc_ids[i] = X_ttc_id THEN
   p_bytes_in[i] := p_bytes_in[i] + X_bytes_in;
   p_bytes_out[i] := p_bytes_out[i] + X_bytes_out;
  END IF;
 END LOOP;

It looks ugly but exactly for one of my functions (about 30...40
ttc_id's; function do very small computations like above; but started
for many rows) it is about 25% faster than use temporary table.
--
engineer

Re: how to use array with "holes" ?

From
"Pavel Stehule"
Date:
> initialisation:
>  FOR p_tmp IN SELECT DISTINCT ON(ttc_id) ttc_id FROM ttc_ids
>  LOOP
>   -- get next value for index
>   i = array_upper(p_ttc_ids, 1) + 1; IF i IS NULL THEN i := 0; END IF;
>   --RAISE NOTICE '[%]', i;
>   p_ttc_ids[i] := p_tmp.ttc_id;
>   p_bytes_in[i] := 0;
>   p_bytes_out[i] := 0;
>  END LOOP;
>

This isn't well style for plpgsql. It's slow for bigger arrays.

create or replace function filltest1(int) returns void as $$
declare a int[];
begin
  for i in 1..$1 loop
    a[i] := 0;
  end loop;
end; $$ language plpgsql;

-- with trick, its necessary for $1> 8000
create or replace function filltest2(int) returns void as $$
declare a int[];
begin
  a := case $1 when 0 then '{}' when 1 then '{0}' else
'{0'||repeat(',0', $1-1) || '}' end;
end; $$ language plpgsql;

filltest2 is more cryptographic , but is 10-20% faster and for
sizeof(a) > ~ 8000 is 30x faster. If you now max size of array you can
do:

declare a int[] = '{0,0,0,0..........}'; b int[] = '{....}'

every array update generates new version of array -> update is more
expensive than you can know from classic languages.