Thread: Certain query eating up all free memory (out of memory error)

Certain query eating up all free memory (out of memory error)

From
Łukasz Dejneka
Date:
Hi group,

I could really use your help with this one. I don't have all the
details right now (I can provide more descriptions tomorrow and logs
if needed), but maybe this will be enough:

I have written a PG (8.3.8) module, which uses Flex Lexical Analyser.
It takes text from database field and finds matches for defined rules.
It returns a set of two text fields (value found and value type).

When I run query like this:
SELECT * FROM flex_me(SELECT some_text FROM some_table WHERE id = 1);
It works perfectly fine. Memory never reaches more than 1% (usually
its below 0.5% of system mem).

But when I run query like this:
SELECT flex_me(some_text_field) FROM some_table WHERE id = 1;
Memory usage goes through the roof, and if the result is over about
10k matches (rows) it eats up all memory and I get "out of memory"
error.

I try to free all memory allocated, and even did a version with double
linked list of results but the same behaviour persists. I tried to
track it down on my own and from my own trials it seems that the
problem lies directly in the set returning function in File 2
"flex_me()" as even with 40k of results in a 2 column array it
shouldn't take more than 1MB of memory. Also when I run it just to the
point of SRF_IS_FIRSTCALL() (whole bit) the memory usage doesn't go
up, but when subsequent SRF_PERCALL calls are made it's where the
memory usage goes through the roof.

Btw, if the following code contains some nasty errors and I'm pretty
sure it does, please know that I'm just learning PG and C programming.
Any help or tips would be greatly appreciated.

Simplified (but still relevant) code below:

File 1 (Flex parser template which is compiled with flex):

%{
#include <stdio.h>

extern void *addToken(int type);
extern char ***flexme(char *ptr);

#define T_NUM  1
#define S_NUM  "number"
#define T_FLO  2
#define S_FLO  "float"
#define T_DAT  3
#define S_DAT  "date
#define T_WRD  7
#define S_WRD  "word"

char ***vals;

int cnt = 0, mem_cnt = 64;

%}

DGT          [0-9]
NUMBER       (-)?{DGT}+
FLOAT        ((-)?{DGT}+[\.,]{DGT}+)|{NUMBER}

DATE_S1      "-"
DATE_S2      ","
DATE_S3      "."
DATE_S4      "/"
DATE_S5      ""
DATE_YY      ([0-9]|([0-9][0-9])|([0-1][0-9][0-9][0-9])|(2[0-4][0-9][0-9]))
DATE_DD      ([1-9]|(([0-2][0-9])|(3[0-1])))
DATE_MM      ([1-9]|((0[1-9])|(1[0-2])))

DATE_YMD_S1  ({DATE_YY}{DATE_S1}{DATE_MM}{DATE_S1}{DATE_DD})
DATE_YMD_S2  ({DATE_YY}{DATE_S2}{DATE_MM}{DATE_S2}{DATE_DD})
DATE_YMD_S3  ({DATE_YY}{DATE_S3}{DATE_MM}{DATE_S3}{DATE_DD})
DATE_YMD_S4  ({DATE_YY}{DATE_S4}{DATE_MM}{DATE_S4}{DATE_DD})
DATE_YMD_S5  ({DATE_YY}{DATE_S5}{DATE_MM}{DATE_S5}{DATE_DD})
DATE_YMD     ({DATE_YMD_S1}|{DATE_YMD_S2}|{DATE_YMD_S3}|{DATE_YMD_S4}|{DATE_YMD_S5})

WORD         ([a-zA-Z0-9]+)

%%

{FLOAT}      addToken(T_FLO);

{DATE_YMD}   addToken(T_DAT);

{WORD}       addToken(T_WRD);

.|\n     /* eat up any unmatched character */

%%

void *
addToken(int type)
{
 int   x = 0;

//    elog(NOTICE,"W[%d] %s", type, yytext);

   //check if we need to add more mem
   if (mem_cnt-1 <= cnt) {
       mem_cnt *= 2;
       vals = repalloc(vals, mem_cnt * sizeof(char *));
//        elog(NOTICE, "mem increased to: %d", mem_cnt*sizeof(char *));
   }
   vals[cnt] = palloc(2 * sizeof(char *));

   //types
   switch (type) {
       case T_FLO:    //float
           x = strlen(S_FLO);
           vals[cnt][1] = palloc((x+1) * sizeof(char));
           strncpy(vals[cnt][1], S_FLO, x);
           vals[cnt][1][x] = '\0';
           break;
       case T_DAT:     //date
           x = strlen(S_DAT);
           vals[cnt][1] = palloc((x+1) * sizeof(char));
           strncpy(vals[cnt][1], S_DAT, x);
           vals[cnt][1][x] = '\0';
           break;
       case T_WRD:     //word
           x = strlen(S_WRD);
           vals[cnt][1] = palloc((x+1) * sizeof(char));
           strncpy(vals[cnt][1], S_WRD, x);
           vals[cnt][1][x] = '\0';
           break;
       default:
           elog(ERROR,"Unknown flexme type: %d", type);
           break;
   }
   //value
   vals[cnt][0] = palloc((yyleng+1) * sizeof(char));
   strncpy(vals[cnt][0], yytext, yyleng);
   vals[cnt][0][yyleng] = '\0';

   cnt++;
//    elog(NOTICE,"i: %d", cnt);

   return 0;
}

char ***flexme(char *ptr)
{

   YY_BUFFER_STATE bp;
   int   yyerr = 0;
   cnt = 0;

   //initial table size
   vals = palloc(mem_cnt * sizeof(char *));

   bp = yy_scan_string(ptr);
   yy_switch_to_buffer(bp);
   yyerr = yylex();
   yy_delete_buffer(bp);

   if (yyerr != 0) {
       elog(ERROR, "Flex parser error code: %d", yyerr);
   }

   return vals;
}



File 2 (PG function, which includes flex output analyser of compiled
File 1 - lex.yy.c):

#include "postgres.h"
#include "fmgr.h"
#include "funcapi.h"

#include "lex.yy.c"

char *text_to_cstring(const text *t);   //this is copied directly from
PG sources
char *
text_to_cstring(const text *t)
{
       /* must cast away the const, unfortunately */
       text           *tunpacked = pg_detoast_datum_packed((struct
varlena *) t);
       int                        len = VARSIZE_ANY_EXHDR(tunpacked);
       char           *result;

       result = (char *) palloc(len + 1);
       memcpy(result, VARDATA_ANY(tunpacked), len);
       result[len] = '\0';

       if (tunpacked != t)
               pfree(tunpacked);

       return result;
}


PG_FUNCTION_INFO_V1(flex_me);
Datum    flex_me(PG_FUNCTION_ARGS);

Datum
flex_me(PG_FUNCTION_ARGS) {
   text             *in = PG_GETARG_TEXT_P(0);

   FuncCallContext  *funcctx;
   TupleDesc        tupdesc;
   AttInMetadata    *attinmeta;
   int              call_cntr, max_calls;
   char             ***values;
   char             *ptr;

   // stuff done only on the first call of the function
   if (SRF_IS_FIRSTCALL()) {
       MemoryContext oldcontext;

       // create a function context for cross-call persistence
       funcctx = SRF_FIRSTCALL_INIT();

       // switch to memory context appropriate for multiple  function calls
       oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

       ptr = text_to_cstring_imm(in);
       values = flexme(ptr);

       //free char pointer
       pfree(ptr);

       // Build a tuple descriptor for our result type
       if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
           ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg
              ("function returning record called in context "
               "that cannot accept type record")));

       // generate attribute metadata needed later to produce
       //   tuples from raw C strings
       attinmeta = TupleDescGetAttInMetadata(tupdesc);
       funcctx->attinmeta = attinmeta;

       //pass first list element
       funcctx->user_fctx = values;

       // total number of tuples to be returned
       funcctx->max_calls = cnt;

       //go back to normal memory context
       MemoryContextSwitchTo(oldcontext);
   }

   // stuff done on every call of the function.
   funcctx = SRF_PERCALL_SETUP();
   call_cntr = funcctx->call_cntr;
   max_calls = funcctx->max_calls;
   attinmeta = funcctx->attinmeta;
   values = (char ***) funcctx->user_fctx;

   //set return routine
   if (call_cntr < max_calls) {
       char      **rvals;
       HeapTuple tuple;
       Datum     result;
       int       i;

       // Prepare a values array for building the returned
       //tuple. This should be an array of C strings which
       //will be processed later by the type input functions
       rvals = palloc(2*sizeof(char *));

       //value (text)
       i = strlen(values[call_cntr][0]);
       rvals[0] = palloc((i+1)*sizeof(char));
       strncpy(rvals[0], values[call_cntr][0], i);
       rvals[0][i] = '\0';

       //type (text)
       i = strlen(values[call_cntr][1]);
       rvals[1] = palloc((i+1)*sizeof(char));
       strncpy(rvals[1], values[call_cntr][1], i);
       rvals[1][i] = '\0';

       // build a tuple and make into datum.
       tuple = BuildTupleFromCStrings(attinmeta, rvals);

       result = HeapTupleGetDatum(tuple);


       //free memory
       pfree(rvals[0]);
       pfree(rvals[1]);
       pfree(rvals);
       pfree(values[call_cntr][0]);
       pfree(values[call_cntr][1]);
       pfree(values[call_cntr]);

       //return datum
       SRF_RETURN_NEXT(funcctx, result);
   }
   else {
       SRF_RETURN_DONE(funcctx);
   }

   return true;
}

Re: Certain query eating up all free memory (out of memory error)

From
Łukasz Dejneka
Date:
EXPLAIN ANALYSE on smaller query:
"Seq Scan on teksty  (cost=0.00..1353.50 rows=1 width=695) (actual
time=0.220..12.354 rows=368 loops=1)"
"  Filter: (id = 1)"
"Total runtime: 12.488 ms"


Memory config:

# - Memory -

shared_buffers = 24MB
temp_buffers = 8MB
max_prepared_transactions = 5
work_mem = 16MB                                # min 64kB
maintenance_work_mem = 16MB                # min 1MB
max_stack_depth = 2MB                        # min 100kB

# - Free Space Map -

max_fsm_pages = 153600
#max_fsm_relations = 1000

Memory info from logs:

TopMemoryContext: 49416 total in 6 blocks; 7680 free (8 chunks); 41736 used
  TopTransactionContext: 8192 total in 1 blocks; 7856 free (1 chunks); 336 used
  Type information cache: 8192 total in 1 blocks; 1800 free (0
chunks); 6392 used
  CFuncHash: 8192 total in 1 blocks; 4936 free (0 chunks); 3256 used
  MbProcContext: 1024 total in 1 blocks; 928 free (6 chunks); 96 used
  Operator class cache: 8192 total in 1 blocks; 3848 free (0 chunks); 4344 used
  Operator lookup cache: 24576 total in 2 blocks; 14072 free (6
chunks); 10504 used
  MessageContext: 8192 total in 1 blocks; 752 free (0 chunks); 7440 used
  smgr relation table: 8192 total in 1 blocks; 2808 free (0 chunks); 5384 used
  TransactionAbortContext: 32768 total in 1 blocks; 32752 free (0
chunks); 16 used
  Portal hash: 8192 total in 1 blocks; 3912 free (0 chunks); 4280 used
  PortalMemory: 8192 total in 1 blocks; 8040 free (0 chunks); 152 used
    PortalHeapMemory: 1024 total in 1 blocks; 880 free (0 chunks); 144 used
      ExecutorState: 516096 total in 6 blocks; 15368 free (7 chunks);
500728 used
        SRF multi-call context: 2499608 total in 276 blocks; 714136
free (38704 chunks); 1785472 used
        ExprContext: 3157941940 total in 12908 blocks; 505592 free (11
chunks); 3157436348 used
  Relcache by OID: 8192 total in 1 blocks; 3376 free (0 chunks); 4816 used
  CacheMemoryContext: 667472 total in 20 blocks; 239368 free (1
chunks); 428104 used
    pg_toast_150116_index: 1024 total in 1 blocks; 240 free (0 chunks); 784 used
    pg_database_datname_index: 1024 total in 1 blocks; 344 free (0
chunks); 680 used
    pg_index_indrelid_index: 1024 total in 1 blocks; 304 free (0
chunks); 720 used
    pg_ts_dict_oid_index: 1024 total in 1 blocks; 344 free (0 chunks); 680 used
    pg_aggregate_fnoid_index: 1024 total in 1 blocks; 344 free (0
chunks); 680 used
    pg_language_name_index: 1024 total in 1 blocks; 344 free (0
chunks); 680 used
    pg_statistic_relid_att_index: 1024 total in 1 blocks; 240 free (0
chunks); 784 used
    pg_ts_dict_dictname_index: 1024 total in 1 blocks; 280 free (0
chunks); 744 used
    pg_namespace_nspname_index: 1024 total in 1 blocks; 304 free (0
chunks); 720 used
    pg_opfamily_oid_index: 1024 total in 1 blocks; 344 free (0 chunks); 680 used
    pg_opclass_oid_index: 1024 total in 1 blocks; 304 free (0 chunks); 720 used
    pg_ts_parser_prsname_index: 1024 total in 1 blocks; 280 free (0
chunks); 744 used
    pg_amop_fam_strat_index: 1024 total in 1 blocks; 88 free (0
chunks); 936 used
    pg_opclass_am_name_nsp_index: 1024 total in 1 blocks; 192 free (0
chunks); 832 used
    pg_trigger_tgrelid_tgname_index: 1024 total in 1 blocks; 240 free
(0 chunks); 784 used
    pg_cast_source_target_index: 1024 total in 1 blocks; 240 free (0
chunks); 784 used
    pg_auth_members_role_member_index: 1024 total in 1 blocks; 280
free (0 chunks); 744 used
    pg_attribute_relid_attnum_index: 1024 total in 1 blocks; 240 free
(0 chunks); 784 used
    pg_ts_config_cfgname_index: 1024 total in 1 blocks; 280 free (0
chunks); 744 used
    pg_authid_oid_index: 1024 total in 1 blocks; 304 free (0 chunks); 720 used
    pg_ts_config_oid_index: 1024 total in 1 blocks; 344 free (0
chunks); 680 used
    pg_conversion_default_index: 1024 total in 1 blocks; 88 free (0
chunks); 936 used
    pg_language_oid_index: 1024 total in 1 blocks; 344 free (0 chunks); 680 used
    pg_enum_oid_index: 1024 total in 1 blocks; 344 free (0 chunks); 680 used
    pg_proc_proname_args_nsp_index: 1024 total in 1 blocks; 152 free
(0 chunks); 872 used
    pg_ts_parser_oid_index: 1024 total in 1 blocks; 344 free (0
chunks); 680 used
    pg_database_oid_index: 1024 total in 1 blocks; 304 free (0 chunks); 720 used
    pg_conversion_name_nsp_index: 1024 total in 1 blocks; 280 free (0
chunks); 744 used
    pg_class_relname_nsp_index: 1024 total in 1 blocks; 240 free (0
chunks); 784 used
    pg_attribute_relid_attnam_index: 1024 total in 1 blocks; 280 free
(0 chunks); 744 used
    pg_class_oid_index: 1024 total in 1 blocks; 304 free (0 chunks); 720 used
    pg_amproc_fam_proc_index: 1024 total in 1 blocks; 88 free (0
chunks); 936 used
    pg_operator_oprname_l_r_n_index: 1024 total in 1 blocks; 88 free
(0 chunks); 936 used
    pg_index_indexrelid_index: 1024 total in 1 blocks; 304 free (0
chunks); 720 used
    pg_type_oid_index: 1024 total in 1 blocks; 304 free (0 chunks); 720 used
    pg_rewrite_rel_rulename_index: 1024 total in 1 blocks; 280 free (0
chunks); 744 used
    pg_authid_rolname_index: 1024 total in 1 blocks; 304 free (0
chunks); 720 used
    pg_auth_members_member_role_index: 1024 total in 1 blocks; 280
free (0 chunks); 744 used
    pg_enum_typid_label_index: 1024 total in 1 blocks; 280 free (0
chunks); 744 used
    pg_constraint_oid_index: 1024 total in 1 blocks; 344 free (0
chunks); 680 used
    pg_conversion_oid_index: 1024 total in 1 blocks; 344 free (0
chunks); 680 used
    pg_ts_template_tmplname_index: 1024 total in 1 blocks; 280 free (0
chunks); 744 used
    pg_ts_config_map_index: 1024 total in 1 blocks; 192 free (0
chunks); 832 used
    pg_namespace_oid_index: 1024 total in 1 blocks; 344 free (0
chunks); 680 used
    pg_type_typname_nsp_index: 1024 total in 1 blocks; 280 free (0
chunks); 744 used
    pg_operator_oid_index: 1024 total in 1 blocks; 304 free (0 chunks); 720 used
    pg_amop_opr_fam_index: 1024 total in 1 blocks; 240 free (0 chunks); 784 used
    pg_proc_oid_index: 1024 total in 1 blocks; 304 free (0 chunks); 720 used
    pg_opfamily_am_name_nsp_index: 1024 total in 1 blocks; 192 free (0
chunks); 832 used
    pg_ts_template_oid_index: 1024 total in 1 blocks; 344 free (0
chunks); 680 used
  MdSmgr: 8192 total in 1 blocks; 7984 free (0 chunks); 208 used
  LOCALLOCK hash: 8192 total in 1 blocks; 3912 free (0 chunks); 4280 used
  Timezones: 48616 total in 2 blocks; 5968 free (0 chunks); 42648 used
  ErrorContext: 8192 total in 1 blocks; 8176 free (4 chunks); 16 used