Re: BUG #17557: ts_headline will error with "invalid memory alloc request size" for large documents

From: Japin Li <japinli(at)hotmail(dot)com>
To: magicagent(at)gmail(dot)com, pgsql-bugs(at)lists(dot)postgresql(dot)org
Subject: Re: BUG #17557: ts_headline will error with "invalid memory alloc request size" for large documents
Date: 2022-07-23 14:33:46
Message-ID: MEYP282MB16692B665807A13C58E84BB9B6939@MEYP282MB1669.AUSP282.PROD.OUTLOOK.COM
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-bugs


On Fri, 22 Jul 2022 at 23:39, PG Bug reporting form <noreply(at)postgresql(dot)org> wrote:
> The following bug has been logged on the website:
>
> Bug reference: 17557
> Logged by: Alex Malek
> Email address: magicagent(at)gmail(dot)com
> PostgreSQL version: 14.4
> Operating system: Red Hat
> Description:
>
> ts_headline when given a documents over a certain size/number of words will
> cause "ERROR: invalid memory alloc request size XXXXXX"
>
> # select ts_headline('b ' || repeat('1 ',16777215), $$'b'$$::tsquery,
> 'MaxWords=4, MinWords=3') ;
> ERROR: invalid memory alloc request size 1610612736
>
> Not just related to document size but also to number of "words" in a
> document:
>
> One less "word" works:
>
> select ts_headline('b ' || repeat('1 ',16777214), $$'b'$$::tsquery,
> 'MaxWords=4, MinWords=3') ;
> ts_headline
> ----------------
> <b>b</b> 1 1 1
> (1 row)
>
> Mem not an issue for larger "words" up to a point:
>
> # select ts_headline('b ' || repeat('123456789012345 ',16777214),
> $$'b'$$::tsquery, 'MaxWords=4, MinWords=3') ;
> ts_headline
> ----------------------------------------------------------
> <b>b</b> 123456789012345 123456789012345 123456789012345
> (1 row)
>
> # select ts_headline('b ' || repeat('1234567890123456 ',16777214),
> $$'b'$$::tsquery, 'MaxWords=4, MinWords=3') ;
> ERROR: invalid memory alloc request size 1140850564
>
> Memory issue appears to be triggered based on total number of words and word
> length
>
> # select ts_headline('b ' || repeat('1234567890123456 ',15790000),
> $$'b'$$::tsquery, 'MaxWords=4, MinWords=3') ;
> ts_headline
> -------------------------------------------------------------
> <b>b</b> 1234567890123456 1234567890123456 1234567890123456
> (1 row)
>
> # select ts_headline('b ' || repeat('1234567890123456 ',15795000),
> $$'b'$$::tsquery, 'MaxWords=4, MinWords=3') ;
> ERROR: invalid memory alloc request size 1074060012
>
>
> I get the same results even when increasing psql GUCs including work_mem,
> shared_buffers and effective_cache_size
> Also on machines w/ significantly more RAM, with and w/o HugePages enabled.

It seems the limitation of repalloc() function [1]. Here is the backtrace.

#0 repalloc (pointer=0x7f3868312050, size=1610612736) at /mnt/workspace/postgresql/build/../src/backend/utils/mmgr/mcxt.c:1190
#1 0x000055f11dcee750 in hlfinditem (prs=0x7ffc5c5ac580, query=0x55f12016f408, pos=16777216, buf=0x55f160e51bf0 "1", buflen=1) at /mnt/workspace/postgresql/build/../src/backend/tsearch/ts_parse.c:462
#2 0x000055f11dcee9b4 in addHLParsedLex (prs=0x7ffc5c5ac580, query=0x55f12016f408, lexs=0x55f12023e9a0, norms=0x55f12023e928) at /mnt/workspace/postgresql/build/../src/backend/tsearch/ts_parse.c:505
#3 0x000055f11dceec2c in hlparsetext (cfgId=12360, prs=0x7ffc5c5ac580, query=0x55f12016f408,
buf=0x7f38ceb19054 "b 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 "..., buflen=33554432) at /mnt/workspace/postgresql/build/../src/backend/tsearch/ts_parse.c:582
#4 0x000055f11dcf1c2b in ts_headline_byid_opt (fcinfo=0x7ffc5c5ac610) at /mnt/workspace/postgresql/build/../src/backend/tsearch/wparser.c:319
#5 0x000055f11de9f57b in DirectFunctionCall4Coll (func=0x55f11dcf1a0b <ts_headline_byid_opt>, collation=0, arg1=12360, arg2=139881962639440, arg3=94494113854472, arg4=94494113855272)
at /mnt/workspace/postgresql/build/../src/backend/utils/fmgr/fmgr.c:861
#6 0x000055f11dcf1e02 in ts_headline_opt (fcinfo=0x55f12023cb48) at /mnt/workspace/postgresql/build/../src/backend/tsearch/wparser.c:366
#7 0x000055f11da28543 in ExecInterpExpr (state=0x55f12023ca60, econtext=0x55f12023c760, isnull=0x7ffc5c5ac96f) at /mnt/workspace/postgresql/build/../src/backend/executor/execExprInterp.c:763
#8 0x000055f11da2a6be in ExecInterpExprStillValid (state=0x55f12023ca60, econtext=0x55f12023c760, isNull=0x7ffc5c5ac96f) at /mnt/workspace/postgresql/build/../src/backend/executor/execExprInterp.c:1858
#9 0x000055f11da86710 in ExecEvalExprSwitchContext (state=0x55f12023ca60, econtext=0x55f12023c760, isNull=0x7ffc5c5ac96f) at /mnt/workspace/postgresql/build/../src/include/executor/executor.h:343
#10 0x000055f11da86788 in ExecProject (projInfo=0x55f12023ca58) at /mnt/workspace/postgresql/build/../src/include/executor/executor.h:377
#11 0x000055f11da869b9 in ExecResult (pstate=0x55f12023c648) at /mnt/workspace/postgresql/build/../src/backend/executor/nodeResult.c:136
#12 0x000055f11da41e17 in ExecProcNodeFirst (node=0x55f12023c648) at /mnt/workspace/postgresql/build/../src/backend/executor/execProcnode.c:463
#13 0x000055f11da356a1 in ExecProcNode (node=0x55f12023c648) at /mnt/workspace/postgresql/build/../src/include/executor/executor.h:259
#14 0x000055f11da38318 in ExecutePlan (estate=0x55f12023c410, planstate=0x55f12023c648, use_parallel_mode=false, operation=CMD_SELECT, sendTuples=true, numberTuples=0, direction=ForwardScanDirection,
dest=0x55f120237198, execute_once=true) at /mnt/workspace/postgresql/build/../src/backend/executor/execMain.c:1636
#15 0x000055f11da35d6b in standard_ExecutorRun (queryDesc=0x55f12018e800, direction=ForwardScanDirection, count=0, execute_once=true)
at /mnt/workspace/postgresql/build/../src/backend/executor/execMain.c:363
#16 0x000055f11da35b81 in ExecutorRun (queryDesc=0x55f12018e800, direction=ForwardScanDirection, count=0, execute_once=true) at /mnt/workspace/postgresql/build/../src/backend/executor/execMain.c:307
#17 0x000055f11dcda8d3 in PortalRunSelect (portal=0x55f1201da030, forward=true, count=0, dest=0x55f120237198) at /mnt/workspace/postgresql/build/../src/backend/tcop/pquery.c:924
#18 0x000055f11dcda50a in PortalRun (portal=0x55f1201da030, count=9223372036854775807, isTopLevel=true, run_once=true, dest=0x55f120237198, altdest=0x55f120237198, qc=0x7ffc5c5acd70)
at /mnt/workspace/postgresql/build/../src/backend/tcop/pquery.c:768
#19 0x000055f11dcd392c in exec_simple_query (query_string=0x55f12016d970 "select ts_headline('b ' || repeat('1 ',16777215), $$'b'$$::tsquery, 'MaxWords=4, MinWords=3') ;")
at /mnt/workspace/postgresql/build/../src/backend/tcop/postgres.c:1243
#20 0x000055f11dcd83ef in PostgresMain (dbname=0x55f120197ea8 "postgres", username=0x55f120197e88 "japin") at /mnt/workspace/postgresql/build/../src/backend/tcop/postgres.c:4505
#21 0x000055f11dc000c2 in BackendRun (port=0x55f1201907c0) at /mnt/workspace/postgresql/build/../src/backend/postmaster/postmaster.c:4490
#22 0x000055f11dbff9b0 in BackendStartup (port=0x55f1201907c0) at /mnt/workspace/postgresql/build/../src/backend/postmaster/postmaster.c:4218
#23 0x000055f11dbfbc28 in ServerLoop () at /mnt/workspace/postgresql/build/../src/backend/postmaster/postmaster.c:1808
#24 0x000055f11dbfb3d9 in PostmasterMain (argc=3, argv=0x55f120166150) at /mnt/workspace/postgresql/build/../src/backend/postmaster/postmaster.c:1480
#25 0x000055f11dabf5f5 in main (argc=3, argv=0x55f120166150) at /mnt/workspace/postgresql/build/../src/backend/main/main.c:197

The repalloc() can only allocate 1GB - 1 bytes.

#define MaxAllocSize ((Size) 0x3fffffff) /* 1 gigabyte - 1 */

#define AllocSizeIsValid(size) ((Size) (size) <= MaxAllocSize)

[1] https://www.postgresql.org/message-id/ME3P282MB16676ED32167189CB0462173B6D69%40ME3P282MB1667.AUSP282.PROD.OUTLOOK.COM

--
Regrads,
Japin Li.
ChengDu WenWu Information Technology Co.,Ltd.

In response to

Browse pgsql-bugs by date

  From Date Subject
Next Message Tom Lane 2022-07-23 15:33:55 Re: Fwd: "SELECT COUNT(*) FROM" still causing issues (deadlock) in PostgreSQL 14.3/4?
Previous Message David Steele 2022-07-23 13:20:32 Re: could not link file in wal restore lines