Re: Connection Broken with Custom Dicts for TSearch2 - Mailing list pgsql-hackers

From Teodor Sigaev
Subject Re: Connection Broken with Custom Dicts for TSearch2
Date
Msg-id 4484548A.2070502@sigaev.ru
Whole thread Raw
In response to Re: Connection Broken with Custom Dicts for TSearch2  (Teodor Sigaev <teodor@sigaev.ru>)
Responses Re: Connection Broken with Custom Dicts for TSearch2  (Oleg Bartunov <oleg@sai.msu.su>)
List pgsql-hackers

Teodor Sigaev wrote:
> Sorry, it isn't mentioned on page, but this example of code working only
> with before 8.1 versions. In 8.1 interface to dictionary was changed.

Try attached dict_tmpl.c

2Oleg: place file on site, pls

--
Teodor Sigaev                                   E-mail: teodor@sigaev.ru
                                                    WWW: http://www.sigaev.ru/
/*
 * num2english dictionary by Ben Chobot <bench@silentmedia.com>, based on
 * example of dictionary
 * Teodor Sigaev <teodor@sigaev.ru>
 *
 */
#include <errno.h>
#include <stdlib.h>
#include <string.h>

#include "postgres.h"

#include "dict.h"
#include "common.h"

#include "subinclude.h"

/* special names for values */
struct nx {
    char name[20];
    int value;
};

static struct nx num2english_numarr[] =
{
{ "zero", 0 },
{ "one", 1 },
{ "two", 2 },
{ "three", 3 },
{ "four", 4 },
{ "five", 5 },
{ "six", 6 },
{ "seven", 7 },
{ "eight", 8 },
{ "nine", 9 },
{ "ten", 10 },
{ "eleven", 11 },
{ "twelve", 12 },
{ "thirteen", 13 },
{ "fourteen", 14 },
{ "fifteen", 15 },
{ "sixteen", 16 },
{ "seventeen", 17 },
{ "eighteen", 18 },
{ "nineteen", 19 },
{ "twenty", 20 },
{ "thirty", 30 },
{ "forty", 40 },
{ "fifty", 50 },
{ "sixty", 60 },
{ "seventy", 70 },
{ "eighty", 80 },
{ "ninety", 90 },
{ "", 999 }
};

static char *num2english_denom[]=
{
"",
"thousand",
"million",
"billion",
"trillion",
"quadrillion",
"quintillion",
"sextillion",
"septillion",
"octillion",
"nonillion",
"decillion",
"undecillion",
"duodecillion",
"tredecillion",
"quattuordecillion",
"sexdecillion",
"septendecillion",
"octodecillion",
"novemdecillion",
"vigintillion"
};


static char *cvt2(int);
static char *cvt3(int);
static char *itowords(long long);


 PG_FUNCTION_INFO_V1(dinit_num2english);
 Datum dinit_num2english(PG_FUNCTION_ARGS);

 Datum
 dinit_num2english(PG_FUNCTION_ARGS) {
    /* nothing to init */

     PG_RETURN_POINTER(NULL);
 }

PG_FUNCTION_INFO_V1(dlexize_num2english);
Datum dlexize_num2english(PG_FUNCTION_ARGS);
Datum
dlexize_num2english(PG_FUNCTION_ARGS) {
     void* dummy = PG_GETARG_POINTER(0);
    char       *in = (char*)PG_GETARG_POINTER(1);
    char *txt = pnstrdup(in, PG_GETARG_INT32(2));
    TSLexeme    *res=0;

    char    *phrase;
    char    *cursor;
    char    *last;
    int    lexes = 1;
    int    thisLex = 0;

     if ( *txt=='\0' ) {
        res = palloc0(sizeof(TSLexeme));
         pfree(txt);
     }
    else
    {
        phrase = itowords(atoll(txt));
        if((cursor = strchr(txt,'.')) && *(cursor+1))
        {
            char    *phrase2;
            char    *ptemp = phrase;

            phrase2 = itowords(atoll(cursor+1));
            phrase = palloc(strlen(phrase2) + strlen(ptemp) + strlen(" . ") + 1);
            sprintf(phrase,"%s . %s",ptemp,phrase2);
            pfree(ptemp);
            pfree(phrase2);
        }
        pfree(txt);

        for(cursor=phrase; *cursor; cursor++) if(*cursor == ' ') lexes++;

        res = palloc0(sizeof(TSLexeme)*(lexes +1));
        for(last=cursor=phrase; *cursor; cursor++)
        {
            if(*cursor == ' ')
            {
                res[thisLex].lexeme = palloc((cursor-last+1));
                memcpy(res[thisLex].lexeme,last,(cursor-last));
                res[thisLex++].lexeme[cursor-last] = '\0';
                /* done with this lex. */
                if(*(cursor+1) == ' ') // if the next space is *also* whitespace....
                {
                    /* We don't want it.
                       Fortunately we know we'll never get more than 2 spaces in a row. */
                    cursor++;
                }
                last=cursor+1;
            }
        }

        /* finish up this last lex */
        res[thisLex].lexeme = palloc((cursor-last+1));
        memcpy(res[thisLex].lexeme,last,(cursor-last));
        res[thisLex++].lexeme[cursor-last] = 0;

        pfree(phrase);
        res[thisLex].lexeme = NULL;
    }

    PG_RETURN_POINTER(res);
}

/* The code below was taken from
http://h21007.www2.hp.com/dspp/tech/tech_TechDocumentDetailPage_IDX/1,1701,3556,00.html 
 and modified slightly to fit in the postgres stored proc framework. It appears to be without copywrite. */

/* take a two-digit number and cvt to words. */
static char *cvt2(int val)
{
    int i=0;
    char word[80];
    char *ret = 0;

    while(num2english_numarr[++i].value <= val)
        /* nothing */;
    strcpy(word,num2english_numarr[i-1].name);
    val -= num2english_numarr[i-1].value;
    if (val > 0)
    {
        strcat(word," ");
        strcat(word,num2english_numarr[val].name);
    }

    ret = palloc(strlen(word)+1);
    memcpy(ret,word,strlen(word)+1);
    return (ret);
}



/* take a 3 digit number and cvt it to words */
static char *cvt3(int val)
{
    int rem, mod;
    char word[80];
    char *ret = 0;

    word[0] = '\0';
    mod = val % 100;
    rem = val / 100;

    if ( rem > 0 )
    {
        strcat(word,num2english_numarr[rem].name);
        strcat(word," hundred");
        if (mod > 0)
            strcat(word," ");
    }
    if ( mod > 0 )
    {
        char *sub = cvt2(mod);
        strcat(word, sub);
        pfree(sub);
    }

    ret = palloc(strlen(word)+1);
    memcpy(ret,word,strlen(word)+1);
    return(ret);
}

/* here's the routine that does the rest */
static char *itowords(long long val)
{
    long long tri;    /* last three digits */
    long long place = 0;    /* which power of 10 we are on */
    int neg=0;    /* sign holder */
    char temp[255];    /* temporary string space */

    char word[255];
    char phrase[100];
    char *ret = 0;

    word[0] = '\0';

    /* check for negative int */
    if (val < 0 )
    {
        neg = 1;
        val = -val;
    }

    if ( val == 0 )
    {
        ret = palloc(5);
        sprintf(ret,"zero");
        return(ret);
    }

    /* what we do now is break it up into sets of three, and add the */
    /* appropriate denominations to each. */
    while (val > 0 )
    {
        phrase[0] = '\0';
        tri = val % 1000; /* last three digits */
        val = val / 1000; /* base 10 shift by 3 */
        if (tri > 0 )
        {
            char *sub = cvt3(tri);
            strcat(phrase,sub);
            pfree(sub);
            strcat(phrase," ");
        }
        if ((place > 0 ) && (tri > 0))
        {
            strcat(phrase,num2english_denom[place]);
            strcat(phrase," ");
        }
        place++;

        /* got the phrase, now put it in the string */
        strcpy(temp,word);
        if ((val > 0) && (tri > 0))
        {
            strcpy(word," ");
            strcat(word,phrase);
        }
        else
            strcpy(word,phrase);

        strcat(word,temp);
    }

    /* remember that minus sign ? */
    if (neg)
    {
        strcpy(temp,word);
        strcpy(word,"negative ");
        strcat(word,temp);
    }

    /* chop off the last space */
    word[strlen(word)-1] = 0;

    ret = palloc(strlen(word)+1);
    memcpy(ret,word,strlen(word)+1);
    return(ret);
}

pgsql-hackers by date:

Previous
From: "Jim C. Nasby"
Date:
Subject: Re: [PERFORM] psql -A (unaligned format) eats too much memory
Next
From: Tom Lane
Date:
Subject: Re: [PERFORM] psql -A (unaligned format) eats too much memory