Re: BUG #17557: ts_headline will error with "invalid memory alloc request size" for large documents - Mailing list pgsql-bugs

From Japin Li
Subject Re: BUG #17557: ts_headline will error with "invalid memory alloc request size" for large documents
Date
Msg-id MEYP282MB16692B665807A13C58E84BB9B6939@MEYP282MB1669.AUSP282.PROD.OUTLOOK.COM
Whole thread Raw
In response to BUG #17557: ts_headline will error with "invalid memory alloc request size" for large documents  (PG Bug reporting form <noreply@postgresql.org>)
List pgsql-bugs
On Fri, 22 Jul 2022 at 23:39, PG Bug reporting form <noreply@postgresql.org> wrote:
> The following bug has been logged on the website:
>
> Bug reference:      17557
> Logged by:          Alex Malek
> Email address:      magicagent@gmail.com
> PostgreSQL version: 14.4
> Operating system:   Red Hat
> Description:
>
> ts_headline when given a documents over a certain size/number of words will
> cause "ERROR:  invalid memory alloc request size XXXXXX"
>
> # select ts_headline('b ' || repeat('1 ',16777215), $$'b'$$::tsquery,
> 'MaxWords=4, MinWords=3') ;
> ERROR:  invalid memory alloc request size 1610612736
>
> Not just related to document size but also to number of "words" in a
> document:
>
> One less "word" works:
>
> select ts_headline('b ' || repeat('1 ',16777214), $$'b'$$::tsquery,
> 'MaxWords=4, MinWords=3') ;
>   ts_headline
> ----------------
>  <b>b</b> 1 1 1
> (1 row)
>
> Mem not an issue for larger "words" up to a point:
>
> # select ts_headline('b ' || repeat('123456789012345 ',16777214),
> $$'b'$$::tsquery, 'MaxWords=4, MinWords=3') ;
>                        ts_headline
> ----------------------------------------------------------
>  <b>b</b> 123456789012345 123456789012345 123456789012345
> (1 row)
>
> # select ts_headline('b ' || repeat('1234567890123456 ',16777214),
> $$'b'$$::tsquery, 'MaxWords=4, MinWords=3') ;
> ERROR:  invalid memory alloc request size 1140850564
>
> Memory issue appears to be triggered based on total number of words and word
> length
>
> # select ts_headline('b ' || repeat('1234567890123456 ',15790000),
> $$'b'$$::tsquery, 'MaxWords=4, MinWords=3') ;
>                          ts_headline
> -------------------------------------------------------------
>  <b>b</b> 1234567890123456 1234567890123456 1234567890123456
> (1 row)
>
> # select ts_headline('b ' || repeat('1234567890123456 ',15795000),
> $$'b'$$::tsquery, 'MaxWords=4, MinWords=3') ;
> ERROR:  invalid memory alloc request size 1074060012
>
>
> I get the same results even when increasing psql GUCs including work_mem,
> shared_buffers  and effective_cache_size
> Also on machines w/ significantly more RAM, with and w/o HugePages enabled.

It seems the limitation of repalloc() function [1].  Here is the backtrace.

#0  repalloc (pointer=0x7f3868312050, size=1610612736) at
/mnt/workspace/postgresql/build/../src/backend/utils/mmgr/mcxt.c:1190
#1  0x000055f11dcee750 in hlfinditem (prs=0x7ffc5c5ac580, query=0x55f12016f408, pos=16777216, buf=0x55f160e51bf0 "1",
buflen=1)at /mnt/workspace/postgresql/build/../src/backend/tsearch/ts_parse.c:462
 
#2  0x000055f11dcee9b4 in addHLParsedLex (prs=0x7ffc5c5ac580, query=0x55f12016f408, lexs=0x55f12023e9a0,
norms=0x55f12023e928)at /mnt/workspace/postgresql/build/../src/backend/tsearch/ts_parse.c:505
 
#3  0x000055f11dceec2c in hlparsetext (cfgId=12360, prs=0x7ffc5c5ac580, query=0x55f12016f408,
    buf=0x7f38ceb19054 "b 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
11 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 "...,
buflen=33554432)at /mnt/workspace/postgresql/build/../src/backend/tsearch/ts_parse.c:582
 
#4  0x000055f11dcf1c2b in ts_headline_byid_opt (fcinfo=0x7ffc5c5ac610) at
/mnt/workspace/postgresql/build/../src/backend/tsearch/wparser.c:319
#5  0x000055f11de9f57b in DirectFunctionCall4Coll (func=0x55f11dcf1a0b <ts_headline_byid_opt>, collation=0, arg1=12360,
arg2=139881962639440,arg3=94494113854472, arg4=94494113855272)
 
    at /mnt/workspace/postgresql/build/../src/backend/utils/fmgr/fmgr.c:861
#6  0x000055f11dcf1e02 in ts_headline_opt (fcinfo=0x55f12023cb48) at
/mnt/workspace/postgresql/build/../src/backend/tsearch/wparser.c:366
#7  0x000055f11da28543 in ExecInterpExpr (state=0x55f12023ca60, econtext=0x55f12023c760, isnull=0x7ffc5c5ac96f) at
/mnt/workspace/postgresql/build/../src/backend/executor/execExprInterp.c:763
#8  0x000055f11da2a6be in ExecInterpExprStillValid (state=0x55f12023ca60, econtext=0x55f12023c760,
isNull=0x7ffc5c5ac96f)at /mnt/workspace/postgresql/build/../src/backend/executor/execExprInterp.c:1858
 
#9  0x000055f11da86710 in ExecEvalExprSwitchContext (state=0x55f12023ca60, econtext=0x55f12023c760,
isNull=0x7ffc5c5ac96f)at /mnt/workspace/postgresql/build/../src/include/executor/executor.h:343
 
#10 0x000055f11da86788 in ExecProject (projInfo=0x55f12023ca58) at
/mnt/workspace/postgresql/build/../src/include/executor/executor.h:377
#11 0x000055f11da869b9 in ExecResult (pstate=0x55f12023c648) at
/mnt/workspace/postgresql/build/../src/backend/executor/nodeResult.c:136
#12 0x000055f11da41e17 in ExecProcNodeFirst (node=0x55f12023c648) at
/mnt/workspace/postgresql/build/../src/backend/executor/execProcnode.c:463
#13 0x000055f11da356a1 in ExecProcNode (node=0x55f12023c648) at
/mnt/workspace/postgresql/build/../src/include/executor/executor.h:259
#14 0x000055f11da38318 in ExecutePlan (estate=0x55f12023c410, planstate=0x55f12023c648, use_parallel_mode=false,
operation=CMD_SELECT,sendTuples=true, numberTuples=0, direction=ForwardScanDirection,
 
    dest=0x55f120237198, execute_once=true) at /mnt/workspace/postgresql/build/../src/backend/executor/execMain.c:1636
#15 0x000055f11da35d6b in standard_ExecutorRun (queryDesc=0x55f12018e800, direction=ForwardScanDirection, count=0,
execute_once=true)
    at /mnt/workspace/postgresql/build/../src/backend/executor/execMain.c:363
#16 0x000055f11da35b81 in ExecutorRun (queryDesc=0x55f12018e800, direction=ForwardScanDirection, count=0,
execute_once=true)at /mnt/workspace/postgresql/build/../src/backend/executor/execMain.c:307
 
#17 0x000055f11dcda8d3 in PortalRunSelect (portal=0x55f1201da030, forward=true, count=0, dest=0x55f120237198) at
/mnt/workspace/postgresql/build/../src/backend/tcop/pquery.c:924
#18 0x000055f11dcda50a in PortalRun (portal=0x55f1201da030, count=9223372036854775807, isTopLevel=true, run_once=true,
dest=0x55f120237198,altdest=0x55f120237198, qc=0x7ffc5c5acd70)
 
    at /mnt/workspace/postgresql/build/../src/backend/tcop/pquery.c:768
#19 0x000055f11dcd392c in exec_simple_query (query_string=0x55f12016d970 "select ts_headline('b ' || repeat('1
',16777215),$$'b'$$::tsquery, 'MaxWords=4, MinWords=3') ;")
 
    at /mnt/workspace/postgresql/build/../src/backend/tcop/postgres.c:1243
#20 0x000055f11dcd83ef in PostgresMain (dbname=0x55f120197ea8 "postgres", username=0x55f120197e88 "japin") at
/mnt/workspace/postgresql/build/../src/backend/tcop/postgres.c:4505
#21 0x000055f11dc000c2 in BackendRun (port=0x55f1201907c0) at
/mnt/workspace/postgresql/build/../src/backend/postmaster/postmaster.c:4490
#22 0x000055f11dbff9b0 in BackendStartup (port=0x55f1201907c0) at
/mnt/workspace/postgresql/build/../src/backend/postmaster/postmaster.c:4218
#23 0x000055f11dbfbc28 in ServerLoop () at /mnt/workspace/postgresql/build/../src/backend/postmaster/postmaster.c:1808
#24 0x000055f11dbfb3d9 in PostmasterMain (argc=3, argv=0x55f120166150) at
/mnt/workspace/postgresql/build/../src/backend/postmaster/postmaster.c:1480
#25 0x000055f11dabf5f5 in main (argc=3, argv=0x55f120166150) at
/mnt/workspace/postgresql/build/../src/backend/main/main.c:197


The repalloc() can only allocate 1GB - 1 bytes.

#define MaxAllocSize    ((Size) 0x3fffffff) /* 1 gigabyte - 1 */

#define AllocSizeIsValid(size)  ((Size) (size) <= MaxAllocSize)


[1]
https://www.postgresql.org/message-id/ME3P282MB16676ED32167189CB0462173B6D69%40ME3P282MB1667.AUSP282.PROD.OUTLOOK.COM

--
Regrads,
Japin Li.
ChengDu WenWu Information Technology Co.,Ltd.



pgsql-bugs by date:

Previous
From: David Steele
Date:
Subject: Re: could not link file in wal restore lines
Next
From: Tom Lane
Date:
Subject: Re: Fwd: "SELECT COUNT(*) FROM" still causing issues (deadlock) in PostgreSQL 14.3/4?