Thread: substring extraction

substring extraction

From
Karel Zak - Zakkr
Date:
Hi,

I need in the SELECT query extract substring 'cccc' from string 
'aaa.bbbbb.cccc.dd.eee' (extract third field from string if 
delimiter is '.').

It is easy if I know where is begin/end of 'cccc' and I can 
use the substring() function:

select substring('aaa.bbbbb.cccc.dd.eee' from 11 for 4);
substr
------
cccc

But how extract it if I don't know where is position of the second 
and third '.'? 

Yes, I know the function position() or textpos(), but this return first 
a position of the substring...

For this exist nice UN*X command "cut -f3 -d." , but how make it in 
SQL?

I ask about it, because I write for me this as new function in C, but 
I'm not sure if not exist other (better) way for it.
                    Karel


------------------------------------------------------------------------------
Karel Zak <zakkr@zf.jcu.cz>                      http://home.zf.jcu.cz/~zakkr/

Docs:         http://docs.linux.cz                          (big docs archive)    
Kim Project:  http://home.zf.jcu.cz/~zakkr/kim/              (process manager)
FTP:          ftp://ftp2.zf.jcu.cz/users/zakkr/              (C/ncurses/PgSQL)
------------------------------------------------------------------------------



Re: [HACKERS] substring extraction

From
jose soares
Date:
Try this:

--returns the $2 field delimited by $3
drop function field(text,int,text);
create function field(text,int,text) returns text as
'declare       string text;       pos int2:= 0;       pos1 int2:= 0;       times int2:= 0;       totpos int2:= 0;
begin       times:= $2 - 1;       string:= $1;       while totpos < times loop               string:=
substr(string,pos+1);              pos:= strpos(string,$3);               totpos:= totpos + 1;       end loop;
string:=substr(string,pos+1);       pos1:= strpos(string,$3);       return substr(string,1,pos1 - 1);end;
 
' language 'plpgsql';

select field('primo.secondo.terzo',1,'.');
field
-----
primo
(1 row)

select field('primo.secondo.terzo',2,'.');
field
-------
secondo
(1 row)

select field('primo.secondo.terzo',3,'.');
field
-----
terzo
(1 row)


José



Karel Zak - Zakkr ha scritto:

> Hi,
>
> I need in the SELECT query extract substring 'cccc' from string
> 'aaa.bbbbb.cccc.dd.eee' (extract third field from string if
> delimiter is '.').
>
> It is easy if I know where is begin/end of 'cccc' and I can
> use the substring() function:
>
> select substring('aaa.bbbbb.cccc.dd.eee' from 11 for 4);
> substr
> ------
> cccc
>
> But how extract it if I don't know where is position of the second
> and third '.'?
>
> Yes, I know the function position() or textpos(), but this return first
> a position of the substring...
>
> For this exist nice UN*X command "cut -f3 -d." , but how make it in
> SQL?
>
> I ask about it, because I write for me this as new function in C, but
> I'm not sure if not exist other (better) way for it.
>
>                                                 Karel
>
> ------------------------------------------------------------------------------
> Karel Zak <zakkr@zf.jcu.cz>                      http://home.zf.jcu.cz/~zakkr/
>
> Docs:         http://docs.linux.cz                          (big docs archive)
> Kim Project:  http://home.zf.jcu.cz/~zakkr/kim/              (process manager)
> FTP:          ftp://ftp2.zf.jcu.cz/users/zakkr/              (C/ncurses/PgSQL)
> ------------------------------------------------------------------------------
>
> ************



Re: [HACKERS] substring extraction

From
Karel Zak - Zakkr
Date:

On Fri, 26 Nov 1999, jose soares wrote:

> Try this:
> 
> --returns the $2 field delimited by $3
> drop function field(text,int,text);
> create function field(text,int,text) returns text as
> 'declare
>         string text;
>         pos int2:= 0;
>         pos1 int2:= 0;
>         times int2:= 0;
>         totpos int2:= 0;
> begin
>         times:= $2 - 1;
>         string:= $1;
>         while totpos < times loop
>                 string:= substr(string,pos+1);
>                 pos:= strpos(string,$3);
>                 totpos:= totpos + 1;
>         end loop;
>         string:= substr(string,pos+1);
>         pos1:= strpos(string,$3);
>         return substr(string,1,pos1 - 1);
>  end;
> ' language 'plpgsql';
> 
Oh, it is great! But my implementation in C for this is 
a little longer (only) :-) 

I send this question to the hacker list because "extract delimited 
substring" is not a abnormal uses's request, and (IMHO) will very 
good if this will in PgSQL. How much uses known write this in 
C or any PL? 

'C' implementafion "extract delimited substring":
-----------------------------------------------
text
*strcut( text *string, char *d, int field )
{char    *ptr     = NULL,     *p     = NULL,    *pe     = NULL;text    *result = NULL;int    siz;ptr  =
VARDATA(string);*(ptr+(VARSIZE(string) - VARHDRSZ)) = '\0';for(p = ptr; *p != '\0'; p++) {    if (field == 1)
break;   if (*p == (int) d)        --field;}    if (!*p)    return textin("");    for(pe = p; *pe != '\0'; pe++) {
if(*pe == (int) d)        break;}    result = (text *) palloc(sizeof(text) * (siz = pe - p) +
VARHDRSZ);strncpy(VARDATA(result),p, siz);*(VARDATA(result) + siz) = '\0';     VARSIZE(result) = siz + VARHDRSZ;return
result;
}

CREATE FUNCTION         strcut(text, char, int)       RETURNS         text       AS              '@module_dir@'
LANGUAGE       'c'; 
 

template1=> select strcut('aaa.bbb.ccc', '.', 2);
strcut
------
bbb

                    Karel



Re: [HACKERS] substring extraction

From
wieck@debis.com (Jan Wieck)
Date:
Karel Zak wrote:

> On Fri, 26 Nov 1999, jose soares wrote:
>
> > Try this:
> >
> > --returns the $2 field delimited by $3
> > drop function field(text,int,text);
> > create function field(text,int,text) returns text as
> > 'declare
> >         string text;
> >         pos int2:= 0;
> >         pos1 int2:= 0;
> >         times int2:= 0;
> >         totpos int2:= 0;
> > begin
> >         times:= $2 - 1;
> >         string:= $1;
> >         while totpos < times loop
> >                 string:= substr(string,pos+1);
> >                 pos:= strpos(string,$3);
> >                 totpos:= totpos + 1;
> >         end loop;
> >         string:= substr(string,pos+1);
> >         pos1:= strpos(string,$3);
> >         return substr(string,1,pos1 - 1);
> >  end;
> > ' language 'plpgsql';
> >
>
>  Oh, it is great! But my implementation in C for this is
> a little longer (only) :-)
>
> I send this question to the hacker list because "extract delimited
> substring" is not a abnormal uses's request, and (IMHO) will very
> good if this will in PgSQL. How much uses known write this in
> C or any PL?

    What about this one:

        create function field(text,int,text) returns text as '
            return [lindex [split $1 $3] $2]
        ' language 'pltcl';

    It  does  all  the  work  as  long as the third argument is a
    single  character.  For  multibyte  delimiters  it  will   be
    slightly  bigger,  but  not  much.  Now you might imagine why
    PL/Tcl was the first language I created.


Jan

--

#======================================================================#
# It's easier to get forgiveness for being wrong than for being right. #
# Let's break this rule - forgive me.                                  #
#========================================= wieck@debis.com (Jan Wieck) #