Thread: PostgreSQL array, recursion and more

PostgreSQL array, recursion and more

From
Ferruccio Zamuner
Date:
MESH Data Tree:

example:
Hallux;A01.378.610.250.300.792.380

where:
A01 is Body Regions
A01.378 is Extremities
A01.378.610 is Lower Extremity
A01.378.610.250 is Foot
A01.378.610.250.300 is Forefoot, Human
A01.378.610.250.300.792 is Toes

CREATE OR REPLACE FUNCTION mesh_split(text) RETURNS text[]
AS $$
return [split('\.',$_[0])];
$$ LANGUAGE plperlu;

arancia=# select mesh_split('A01.378.610.250.300.792.380');
           mesh_split
-------------------------------
  {A01,378,610,250,300,792,380}
(1 row)


/*
    Is it a real array?
    If it is, why can I not use index to access its items?
  */

arancia=# select mesh_split('A01.378.610.250.300.792.380')[1];
ERROR:  syntax error at or near "["
LINE 1: select mesh_split('A01.378.610.250.300.792.380')[1];
                                                         ^
/*
     but it is an array, it behaves as it is.
  */
arancia=> select array_length(mesh_split('A01.378.610.250.300.792.380'),1);
  array_length
--------------
             7
(1 row)

/* How to get access to its items then?
  */


Another problem related:

arancia=> select * from meshtree where code = ANY
mesh_split('A01.378.610.250.300.792.380');
ERROR:  syntax error at or near "mesh_split"
LINE 1: select * from meshtree where code = ANY mesh_split('A01.378....
                                                 ^

select * from meshtree,
unnest(mesh_split('A01.378.610.250.300.792.380')) as c where
c=meshtree.code;
  parent |  id   | code |                            description

--------+-------+------+-------------------------------------------------------------------
      10 |    11 | 300  | Dehydroepiandrosterone Sulfate
      33 |    34 | 250  | Cymarine
      48 |    49 | 250  | Cymarine
      61 |    62 | 250  | Dihydrotachysterol
      66 |    68 | 300  | Calcitriol
      65 |    69 | 250  | Calcifediol
      92 |    93 | 380  | Glycodeoxycholic Acid
      98 |    99 | 250  | Finasteride
     111 |   117 | 300  | Chenodeoxycholic Acid
     145 |   146 | 300  | Dehydroepiandrosterone Sulfate
     180 |   182 | 250  | Ethinyl Estradiol-Norgestrel Combination
     190 |   191 | 250  | Desoximetasone
[..]
         | 18638 | A01  | Body Regions
[..]
     190 |   192 | 300  | Dexamethasone Isonicotinate
     195 |   196 | 250  | Clobetasol
     199 |   200 | 300  | Fluocinonide
     206 |   207 | 250  | Diflucortolone
     266 |   267 | 300  | Dexamethasone Isonicotinate
     281 |   282 | 250  | Diflucortolone
     290 |   293 | 250  | Dehydrocholesterols
     305 |   306 | 250  | Dihydrotachysterol
     312 |   314 | 300  | Calcitriol
     311 |   315 | 250  | Calcifediol
     320 |   321 | 250  | Cholestanol
     328 |   330 | 300  | Calcitriol
[..]
   52135 | 52136 | 250  | Eye Injuries
   52136 | 52137 | 250  | Eye Burns
   52149 | 52155 | 300  | Hematoma, Epidural, Cranial
   52181 | 52196 | 300  | Gallbladder Emptying
   52269 | 52277 | 300  | Caplan Syndrome
   52360 | 52368 | 300  | Caplan Syndrome
   52428 | 52442 | 380  | Hemothorax
   52476 | 52491 | 610  | Pneumonia
   52534 | 52535 | 380  | Legionnaires' Disease
(2204 rows)

I really want to write better similar query:

arancia=> with recursive t(id,parent,codeparts,idx,last,descriptions) as (
   SELECT
id, parent, mesh_split('A01.378.610.250.300.792.380'), 1,
array_length(mesh_split('A01.378.610.250.300.792.380'),1),
ARRAY[description]
     FROM meshtree WHERE code='A01'
     UNION ALL
   SELECT m.id, m.parent, t.codeparts, idx+1, last, descriptions ||
ARRAY[description]
     FROM meshtree AS m JOIN t ON (t.id=m.parent)
    WHERE idx<=last AND m.code=t.codeparts[idx+1])
  SELECT t.* FROM t;
   id   | parent |           codeparts           | idx | last |
                           descriptions
-------+--------+-------------------------------+-----+------+--------...
  18638 |        | {A01,378,610,250,300,792,380} |   1 |    7 | {"Body
Regions"}
  18675 |  18638 | {A01,378,610,250,300,792,380} |   2 |    7 | {"Body
Regions",Extremities}
  18676 |  18675 | {A01,378,610,250,300,792,380} |   3 |    7 | {"Body
Regions",Extremities,"Lower Extremity"}
  18679 |  18676 | {A01,378,610,250,300,792,380} |   4 |    7 | {"Body
Regions",Extremities,"Lower Extremity",Foot}
  18682 |  18679 | {A01,378,610,250,300,792,380} |   5 |    7 | {"Body
Regions",Extremities,"Lower Extremity",Foot,"Forefoot, Human"}
  18683 |  18682 | {A01,378,610,250,300,792,380} |   6 |    7 | {"Body
Regions",Extremities,"Lower Extremity",Foot,"Forefoot, Human",Toes}
  18684 |  18683 | {A01,378,610,250,300,792,380} |   7 |    7 | {"Body
Regions",Extremities,"Lower Extremity",Foot,"Forefoot, Human",Toes,Hallux}
(7 rows)

explain analyze with recursive
t(id,parent,codeparts,idx,last,descriptions) as (
   select

id,parent,mesh_split('A01.378.610.250.300.792.380'),1,array_length(mesh_split('A01.378.610.250.300.792.380'),1),ARRAY[description]

from meshtree where code='A01'
   union all
   select m.id,m.parent,t.codeparts,idx+1,last,descriptions ||
ARRAY[description] from meshtree as m join t on (t.id=m.parent) where
idx<=last and m.code=t.codeparts[idx+1]) select t.* from t;

QUERY PLAN


------------------------------------------------------------------------------------------------------------------------------------------------------
  CTE Scan on t  (cost=6336.53..6337.17 rows=32 width=80) (actual
time=4.850..9.453 rows=7 loops=1)
    CTE t
      ->  Recursive Union  (cost=0.00..6336.53 rows=32 width=99) (actual
time=4.839..9.397 rows=7 loops=1)
            ->  Index Scan using meshtree_id_code on meshtree
(cost=0.00..1030.38 rows=22 width=27) (actual time=4.828..8.895 rows=1
loops=1)
                  Index Cond: (code = 'A01'::text)
            ->  Nested Loop  (cost=0.00..530.55 rows=1 width=99) (actual
time=0.051..0.061 rows=1 loops=7)
                  ->  WorkTable Scan on t  (cost=0.00..4.95 rows=73
width=76) (actual time=0.005..0.008 rows=1 loops=7)
                        Filter: (idx <= last)
                  ->  Index Scan using meshtree_parent_code on meshtree
m  (cost=0.00..7.18 rows=1 width=31) (actual time=0.031..0.034 rows=1
loops=7)
                        Index Cond: ((m.parent = t.id) AND (m.code =
t.codeparts[(t.idx + 1)]))
  Total runtime: 9.758 ms
(11 rows)


PostgreSQL rocks!


Thank you in advance,              \ferz

Attachment

Re: PostgreSQL array, recursion and more

From
Achilleas Mantzios
Date:
Στις Wednesday 17 November 2010 15:22:34 ο/η Ferruccio Zamuner έγραψε:
>
> /* How to get access to its items then?
>   */

select (mesh_split('A01.378.610.250.300.792.380')::text[])[1];


--
Achilleas Mantzios


Re: PostgreSQL array, recursion and more

From
Dmitriy Igrishin
Date:
Hey Ferruccio,<br /><br />Also consider ltree contrib module as alternative.<br /><a
href="http://www.postgresql.org/docs/9.0/static/ltree.html">http://www.postgresql.org/docs/9.0/static/ltree.html</a><br
/><br/>;-)<br /><br /><div class="gmail_quote"> 2010/11/17 Ferruccio Zamuner <span dir="ltr"><<a
href="mailto:nonsolosoft@diff.org">nonsolosoft@diff.org</a>></span><br/><blockquote class="gmail_quote"
style="margin:0pt 0pt 0pt 0.8ex; border-left: 1px solid rgb(204, 204, 204); padding-left: 1ex;"> MESH Data Tree:<br
/><br/> example:<br /> Hallux;A01.378.610.250.300.792.380<br /><br /> where:<br /> A01 is Body Regions<br /> A01.378 is
Extremities<br/> A01.378.610 is Lower Extremity<br /> A01.378.610.250 is Foot<br /> A01.378.610.250.300 is Forefoot,
Human<br/> A01.378.610.250.300.792 is Toes<br /><br /> CREATE OR REPLACE FUNCTION mesh_split(text) RETURNS text[]<br />
AS$$<br /> return [split('\.',$_[0])];<br /> $$ LANGUAGE plperlu;<br /><br /> arancia=# select
mesh_split('A01.378.610.250.300.792.380');<br/>          mesh_split<br /> -------------------------------<br />
 {A01,378,610,250,300,792,380}<br/> (1 row)<br /><br /><br /> /*<br />   Is it a real array?<br />   If it is, why can
Inot use index to access its items?<br />  */<br /><br /> arancia=# select
mesh_split('A01.378.610.250.300.792.380')[1];<br/> ERROR:  syntax error at or near "["<br /> LINE 1: select
mesh_split('A01.378.610.250.300.792.380')[1];<br/>                                                        ^<br /> /*<br
/>   but it is an array, it behaves as it is.<br />  */<br /> arancia=> select
array_length(mesh_split('A01.378.610.250.300.792.380'),1);<br/>  array_length<br /> --------------<br />          
 7<br/> (1 row)<br /><br /> /* How to get access to its items then?<br />  */<br /><br /><br /> Another problem
related:<br/><br /> arancia=> select * from meshtree where code = ANY mesh_split('A01.378.610.250.300.792.380');<br
/>ERROR:  syntax error at or near "mesh_split"<br /> LINE 1: select * from meshtree where code = ANY
mesh_split('A01.378....<br/>                                                ^<br /><br /> select * from meshtree,
unnest(mesh_split('A01.378.610.250.300.792.380'))as c where c=meshtree.code;<br />  parent |  id   | code |            
              description <br />
--------+-------+------+-------------------------------------------------------------------<br/>     10 |    11 | 300
 |Dehydroepiandrosterone Sulfate<br />     33 |    34 | 250  | Cymarine<br />     48 |    49 | 250  | Cymarine<br />  
 61 |    62 | 250  | Dihydrotachysterol<br />     66 |    68 | 300  | Calcitriol<br />     65 |    69 | 250  |
Calcifediol<br/>     92 |    93 | 380  | Glycodeoxycholic Acid<br />     98 |    99 | 250  | Finasteride<br />    111 |
 117 | 300  | Chenodeoxycholic Acid<br />    145 |   146 | 300  | Dehydroepiandrosterone Sulfate<br />    180 |   182 |
250 | Ethinyl Estradiol-Norgestrel Combination<br />    190 |   191 | 250  | Desoximetasone<br /> [..]<br />        |
18638| A01  | Body Regions<br /> [..]<br />    190 |   192 | 300  | Dexamethasone Isonicotinate<br />    195 |   196 |
250 | Clobetasol<br />    199 |   200 | 300  | Fluocinonide<br />    206 |   207 | 250  | Diflucortolone<br />    266 |
 267 | 300  | Dexamethasone Isonicotinate<br />    281 |   282 | 250  | Diflucortolone<br />    290 |   293 | 250  |
Dehydrocholesterols<br/>    305 |   306 | 250  | Dihydrotachysterol<br />    312 |   314 | 300  | Calcitriol<br />  
 311|   315 | 250  | Calcifediol<br />    320 |   321 | 250  | Cholestanol<br />    328 |   330 | 300  | Calcitriol<br
/>[..]<br />  52135 | 52136 | 250  | Eye Injuries<br />  52136 | 52137 | 250  | Eye Burns<br />  52149 | 52155 | 300  |
Hematoma,Epidural, Cranial<br />  52181 | 52196 | 300  | Gallbladder Emptying<br />  52269 | 52277 | 300  | Caplan
Syndrome<br/>  52360 | 52368 | 300  | Caplan Syndrome<br />  52428 | 52442 | 380  | Hemothorax<br />  52476 | 52491 |
610 | Pneumonia<br />  52534 | 52535 | 380  | Legionnaires' Disease<br /> (2204 rows)<br /><br /> I really want to
writebetter similar query:<br /><br /> arancia=> with recursive t(id,parent,codeparts,idx,last,descriptions) as (<br
/> SELECT<br /> id, parent, mesh_split('A01.378.610.250.300.792.380'), 1,
array_length(mesh_split('A01.378.610.250.300.792.380'),1),ARRAY[description]<br />    FROM meshtree WHERE code='A01'<br
/>   UNION ALL<br />  SELECT <a href="http://m.id" target="_blank">m.id</a>, m.parent, t.codeparts, idx+1, last,
descriptions|| ARRAY[description]<br />    FROM meshtree AS m JOIN t ON (<a href="http://t.id"
target="_blank">t.id</a>=m.parent)<br/>   WHERE idx<=last AND m.code=t.codeparts[idx+1])<br />  SELECT t.* FROM
t;<br/>  id   | parent |           codeparts           | idx | last |                           descriptions<br />
-------+--------+-------------------------------+-----+------+--------...<br/>  18638 |        |
{A01,378,610,250,300,792,380}|   1 |    7 | {"Body Regions"}<br />  18675 |  18638 | {A01,378,610,250,300,792,380} |  
2|    7 | {"Body Regions",Extremities}<br />  18676 |  18675 | {A01,378,610,250,300,792,380} |   3 |    7 | {"Body
Regions",Extremities,"LowerExtremity"}<br />  18679 |  18676 | {A01,378,610,250,300,792,380} |   4 |    7 | {"Body
Regions",Extremities,"LowerExtremity",Foot}<br />  18682 |  18679 | {A01,378,610,250,300,792,380} |   5 |    7 | {"Body
Regions",Extremities,"LowerExtremity",Foot,"Forefoot, Human"}<br />  18683 |  18682 | {A01,378,610,250,300,792,380} |  
6|    7 | {"Body Regions",Extremities,"Lower Extremity",Foot,"Forefoot, Human",Toes}<br />  18684 |  18683 |
{A01,378,610,250,300,792,380}|   7 |    7 | {"Body Regions",Extremities,"Lower Extremity",Foot,"Forefoot,
Human",Toes,Hallux}<br/> (7 rows)<br /><br /> explain analyze with recursive
t(id,parent,codeparts,idx,last,descriptions)as (<br />  select
id,parent,mesh_split('A01.378.610.250.300.792.380'),1,array_length(mesh_split('A01.378.610.250.300.792.380'),1),ARRAY[description]
frommeshtree where code='A01'<br />  union all<br />  select <a href="http://m.id"
target="_blank">m.id</a>,m.parent,t.codeparts,idx+1,last,descriptions|| ARRAY[description] from meshtree as m join t on
(<ahref="http://t.id" target="_blank">t.id</a>=m.parent) where idx<=last and m.code=t.codeparts[idx+1]) select t.*
fromt;<br /><br /> QUERY PLAN <br />
------------------------------------------------------------------------------------------------------------------------------------------------------<br
/> CTE Scan on t  (cost=6336.53..6337.17 rows=32 width=80) (actual time=4.850..9.453 rows=7 loops=1)<br />   CTE t<br
/>    ->  Recursive Union  (cost=0.00..6336.53 rows=32 width=99) (actual time=4.839..9.397 rows=7 loops=1)<br />    
     ->  Index Scan using meshtree_id_code on meshtree (cost=0.00..1030.38 rows=22 width=27) (actual
time=4.828..8.895rows=1 loops=1)<br />                 Index Cond: (code = 'A01'::text)<br />           ->  Nested
Loop (cost=0.00..530.55 rows=1 width=99) (actual time=0.051..0.061 rows=1 loops=7)<br />                 ->
 WorkTableScan on t  (cost=0.00..4.95 rows=73 width=76) (actual time=0.005..0.008 rows=1 loops=7)<br />                
     Filter: (idx <= last)<br />                 ->  Index Scan using meshtree_parent_code on meshtree m
 (cost=0.00..7.18rows=1 width=31) (actual time=0.031..0.034 rows=1 loops=7)<br />                       Index Cond:
((m.parent= <a href="http://t.id" target="_blank">t.id</a>) AND (m.code = t.codeparts[(t.idx + 1)]))<br />  Total
runtime:9.758 ms<br /> (11 rows)<br /><br /><br /> PostgreSQL rocks!<br /><br /><br /> Thank you in advance,          
  \ferz<br /><br /><br /> --<br /> Sent via pgsql-sql mailing list (<a
href="mailto:pgsql-sql@postgresql.org">pgsql-sql@postgresql.org</a>)<br/> To make changes to your subscription:<br /><a
href="http://www.postgresql.org/mailpref/pgsql-sql"target="_blank">http://www.postgresql.org/mailpref/pgsql-sql</a><br
/><br/></blockquote></div><br /><br clear="all" /><br />-- <br />// Dmitriy.<br /><br /><br />