BUG #19416: Backend SIGSEGV in ExecShutdownHashJoin/ExecHashTableDetach/dsa_free - Mailing list pgsql-bugs

From PG Bug reporting form
Subject BUG #19416: Backend SIGSEGV in ExecShutdownHashJoin/ExecHashTableDetach/dsa_free
Date
Msg-id 19416-967b71bb54d8fe9b@postgresql.org
Whole thread Raw
List pgsql-bugs
The following bug has been logged on the website:

Bug reference:      19416
Logged by:          Chris Hofstaedtler
Email address:      ch+pg@zeha.at
PostgreSQL version: 18.2
Operating system:   Debian 12 on amd64, glibc 2.36-9+deb12u13
Description:

The affected database was recently pg_upgrade'd from 17.x (exact stable
release TBD). On some large queries using parallel workers we now see
crashing backends.
While we have identified a query that seems to trigger the SEGV, it does not
always crash.
Note that we run with auto_explain and pg_stat_statements enabled, although
I don't see it being involved in the backtrace below.

We will try to collect further info, but thats all we have for the moment.

Example gdb bt full from a coredump:

Program terminated with signal SIGSEGV, Segmentation fault.

warning: Unexpected size of section `.reg-xstate/1181045' in core file.
#0  0x0000564830c3c9d1 in dsa_free (area=0x5648621a9280, dp=<optimized out>)
at ./build/../src/backend/utils/mmgr/dsa.c:845
845     ./build/../src/backend/utils/mmgr/dsa.c: No such file or directory.
(gdb) bt full
#0  0x0000564830c3c9d1 in dsa_free (area=0x5648621a9280, dp=<optimized out>)
at ./build/../src/backend/utils/mmgr/dsa.c:845
        segment_map = 0x5648621a92b8
        pageno = 0
        span_pointer = 0
        span = <optimized out>
        superblock = <optimized out>
        object = <optimized out>
        size = <optimized out>
        size_class = <optimized out>
#1  0x0000564830917ae1 in ExecHashTableDetach (hashtable=0x564862147fa0) at
./build/../src/backend/executor/nodeHash.c:3439
        i = <optimized out>
        pstate = 0x7ef9970b5040
#2  0x000056483091afee in ExecShutdownHashJoin
(node=node@entry=0x56486203ab20) at
./build/../src/backend/executor/nodeHashjoin.c:1593
No locals.
#3  0x00005648308ff8f8 in ExecShutdownNode_walker (node=0x56486203ab20,
context=0x0) at ./build/../src/backend/executor/execProcnode.c:818
No locals.
#4  0x00005648308ff92e in ExecShutdownNode_walker (node=<optimized out>,
context=<optimized out>) at
./build/../src/backend/executor/execProcnode.c:779
No locals.
#5  0x000056483096caad in planstate_tree_walker_impl
(planstate=planstate@entry=0x56486203a810,
walker=walker@entry=0x5648308ff920 <ExecShutdownNode_walker>,
context=context@entry=0x0)
    at ./build/../src/backend/nodes/nodeFuncs.c:4739
        plan = 0x7ef9981e4440
        lc = <optimized out>
#6  0x00005648308ff873 in ExecShutdownNode_walker (node=0x56486203a810,
context=0x0) at ./build/../src/backend/executor/execProcnode.c:798
No locals.
#7  0x00005648308ff92e in ExecShutdownNode_walker (node=<optimized out>,
context=<optimized out>) at
./build/../src/backend/executor/execProcnode.c:779
No locals.
#8  0x000056483096caad in planstate_tree_walker_impl
(planstate=planstate@entry=0x56486203a608,
walker=walker@entry=0x5648308ff920 <ExecShutdownNode_walker>,
context=context@entry=0x0)
    at ./build/../src/backend/nodes/nodeFuncs.c:4739
        plan = 0x7ef9981e4588
        lc = <optimized out>
#9  0x00005648308ff873 in ExecShutdownNode_walker (node=0x56486203a608,
context=0x0) at ./build/../src/backend/executor/execProcnode.c:798
No locals.
#10 0x00005648308ff92e in ExecShutdownNode_walker (node=<optimized out>,
context=<optimized out>) at
./build/../src/backend/executor/execProcnode.c:779
No locals.
#11 0x000056483096caad in planstate_tree_walker_impl
(planstate=planstate@entry=0x56486203a270,
walker=walker@entry=0x5648308ff920 <ExecShutdownNode_walker>,
context=context@entry=0x0)
    at ./build/../src/backend/nodes/nodeFuncs.c:4739
        plan = 0x7ef9981e4690
        lc = <optimized out>
#12 0x00005648308ff873 in ExecShutdownNode_walker (node=0x56486203a270,
context=0x0) at ./build/../src/backend/executor/execProcnode.c:798
No locals.
#13 0x00005648308ff92e in ExecShutdownNode_walker (node=<optimized out>,
context=<optimized out>) at
./build/../src/backend/executor/execProcnode.c:779
No locals.
#14 0x000056483096caad in planstate_tree_walker_impl
(planstate=planstate@entry=0x564862039b40,
walker=walker@entry=0x5648308ff920 <ExecShutdownNode_walker>,
context=context@entry=0x0)
    at ./build/../src/backend/nodes/nodeFuncs.c:4739
        plan = 0x7ef9981e4d70
        lc = <optimized out>
#15 0x00005648308ff873 in ExecShutdownNode_walker (node=0x564862039b40,
context=0x0) at ./build/../src/backend/executor/execProcnode.c:798
No locals.
#16 0x00005648308ff92e in ExecShutdownNode_walker (node=<optimized out>,
context=<optimized out>) at
./build/../src/backend/executor/execProcnode.c:779
No locals.
#17 0x000056483096caad in planstate_tree_walker_impl
(planstate=planstate@entry=0x564862039930,
walker=walker@entry=0x5648308ff920 <ExecShutdownNode_walker>,
context=context@entry=0x0)
    at ./build/../src/backend/nodes/nodeFuncs.c:4739
        plan = 0x7ef9981e6ff0
        lc = <optimized out>
#18 0x00005648308ff873 in ExecShutdownNode_walker (node=0x564862039930,
context=0x0) at ./build/../src/backend/executor/execProcnode.c:798
No locals.
#19 0x00005648308ff92e in ExecShutdownNode_walker (node=<optimized out>,
context=<optimized out>) at
./build/../src/backend/executor/execProcnode.c:779
No locals.
#20 0x000056483096caad in planstate_tree_walker_impl
(planstate=planstate@entry=0x5648620396d8,
walker=walker@entry=0x5648308ff920 <ExecShutdownNode_walker>,
context=context@entry=0x0) at ./build/../src/backend/nodes/nodeFuncs.c:4739
        plan = 0x7ef9981e7e40
        lc = <optimized out>
#21 0x00005648308ff873 in ExecShutdownNode_walker
(node=node@entry=0x5648620396d8, context=context@entry=0x0) at
./build/../src/backend/executor/execProcnode.c:798
No locals.
#22 0x00005648309001ac in ExecShutdownNode_walker (context=0x0,
node=node@entry=0x5648620396d8) at
./build/../src/backend/executor/execProcnode.c:780
No locals.
#23 0x00005648308f8378 in ExecutePlan (dest=0x7ef9981f9508,
direction=<optimized out>, numberTuples=0, sendTuples=true,
operation=CMD_SELECT, queryDesc=0x56486202ce18) at
./build/../src/backend/executor/execMain.c:1769
        estate = 0x564862039328
        use_parallel_mode = <optimized out>
        slot = <optimized out>
        planstate = 0x5648620396d8
        current_tuple_count = 1116644
        estate = <optimized out>
        planstate = <optimized out>
        use_parallel_mode = <optimized out>
        slot = <optimized out>
        current_tuple_count = <optimized out>
#24 standard_ExecutorRun (queryDesc=0x56486202ce18, direction=<optimized
out>, count=0) at ./build/../src/backend/executor/execMain.c:366
        estate = 0x564862039328
        operation = CMD_SELECT
        dest = 0x7ef9981f9508
        sendTuples = <optimized out>
        oldcontext = 0x56486202cd20
#25 0x00007f13277f84f5 in pgss_ExecutorRun (queryDesc=0x56486202ce18,
direction=ForwardScanDirection, count=0) at
./build/../contrib/pg_stat_statements/pg_stat_statements.c:1035
        _save_exception_stack = 0x7ffe82f6a9d0
        _save_context_stack = 0x0
        _local_sigjmp_buf = {{__jmpbuf = {94868880437496,
-6331151954150227466, 0, 94868881985048, 0, 1, -6331151954116673034,
-6199079795440942602}, __mask_was_saved = 0, __saved_mask = {__val = {1, 0,
0, 0, 0, 0, 0, 0, 0, 139610464091712, 10278822671576958720, 94868059062592,
94868882036440,
                139610464091712, 94868055788898, 94868059062592}}}}
        _do_rethrow = false
#26 0x00007f1329304713 in explain_ExecutorRun (queryDesc=0x56486202ce18,
direction=ForwardScanDirection, count=0) at
./build/../contrib/auto_explain/auto_explain.c:335
        _save_exception_stack = 0x7ffe82f6ab70
        _save_context_stack = 0x0
        _local_sigjmp_buf = {{__jmpbuf = {94868880437496,
-6331151954051661322, 0, 94868881985048, 0, 1, -6331151954152324618,
-6199084650813390346}, __mask_was_saved = 0, __saved_mask = {__val =
{94868879876880, 6492170816, 94868055788898, 139610464091712, 0,
140731095624304, 94868052416602,
                94868881985048, 94868059431168, 94868881984800,
139610464157392, 140731095624352, 94868055859412, 139719870717952,
94868880437496, 0}}}}
        _do_rethrow = false
#27 0x0000564830ac8eb7 in PortalRunSelect
(portal=portal@entry=0x564861eb30f8, forward=forward@entry=true, count=0,
count@entry=9223372036854775807, dest=dest@entry=0x7ef9981f9508) at
./build/../src/backend/tcop/pquery.c:921
        queryDesc = 0x56486202ce18
        direction = <optimized out>
        nprocessed = <optimized out>
        __func__ = "PortalRunSelect"
#28 0x0000564830aca31b in PortalRun (portal=portal@entry=0x564861eb30f8,
count=count@entry=9223372036854775807, isTopLevel=isTopLevel@entry=true,
dest=dest@entry=0x7ef9981f9508, altdest=altdest@entry=0x7ef9981f9508,
qc=qc@entry=0x7ffe82f6acb0) at ./build/../src/backend/tcop/pquery.c:765
        _save_exception_stack = 0x7ffe82f6af90
        _save_context_stack = 0x0
        _local_sigjmp_buf = {{__jmpbuf = {94868880437496,
-6331151954007621130, 94868879876840, 140731095624880, 139610464163080,
94868879876880, -6331151954097798666, -338229140967718410}, __mask_was_saved
= 0, __saved_mask = {__val = {0, 94868057784462, 64, 94867237634049,
94868880449032,
                140731095624720, 94868880437496, 94868057784462, 1,
139610464163008, 94868879876880, 140731095624784, 94868055788898,
140731095624784, 2, 140731095624784}}}}
        _do_rethrow = <optimized out>
        result = <optimized out>
        nprocessed = <optimized out>
        saveTopTransactionResourceOwner = 0x564861e75740
        saveTopTransactionContext = 0x564861ec1040
        saveActivePortal = 0x0
        saveResourceOwner = 0x564861e75740
        savePortalContext = 0x0
        saveMemoryContext = 0x564861ec1040
        __func__ = "PortalRun"
#29 0x0000564830ac62b9 in exec_simple_query (
    query_string=0x564861dc9b58 "SELECT\n", ' ' <repeats 24 times>,
"vw_network_settings.subscriber_id AS subscriber_id,\n", ' ' <repeats 24
times>, "json_build_object(\n", ' ' <repeats 28 times>, "'msisdn',
msisdn.msisdn::text,\n", ' ' <repeats 15 times>...)
    at ./build/../src/backend/tcop/postgres.c:1273
        cmdtaglen = 6
        snapshot_set = <optimized out>
        per_parsetree_context = 0x0
        plantree_list = <optimized out>
        parsetree = 0x564861e2a2e8
        commandTag = <optimized out>
        qc = {commandTag = CMDTAG_UNKNOWN, nprocessed = 0}
        querytree_list = <optimized out>
        portal = 0x564861eb30f8
        receiver = 0x7ef9981f9508
        format = 0
        cmdtagname = <optimized out>
        parsetree_item__state = {l = 0x564861e2a310, i = <optimized out>}
        dest = DestRemote
        oldcontext = 0x564861ec1040
        parsetree_list = 0x564861e2a310
        parsetree_item = <optimized out>
        save_log_statement_stats = false
        was_logged = false
        use_implicit_block = false
        msec_str = "\000
\000\000\000\000\000\000\265W\367\202\376\177\000\000Q\000\000\000\000\000\000\000\360\256\366\202\376\177\000"
        __func__ = "exec_simple_query"
#30 0x0000564830ac7f5c in PostgresMain (dbname=<optimized out>,
username=<optimized out>) at ./build/../src/backend/tcop/postgres.c:4766
        query_string = 0x564861dc9b58 "SELECT\n", ' ' <repeats 24 times>,
"vw_network_settings.subscriber_id AS subscriber_id,\n", ' ' <repeats 24
times>, "json_build_object(\n", ' ' <repeats 28 times>, "'msisdn',
msisdn.msisdn::text,\n", ' ' <repeats 15 times>...
        firstchar = <optimized out>
        input_message = {data = 0x564861dc9b58 "SELECT\n", ' ' <repeats 24
times>, "vw_network_settings.subscriber_id AS subscriber_id,\n", ' '
<repeats 24 times>, "json_build_object(\n", ' ' <repeats 28 times>,
"'msisdn', msisdn.msisdn::text,\n", ' ' <repeats 15 times>..., len = 1089,
          maxlen = 2048, cursor = 1089}
        local_sigjmp_buf = {{__jmpbuf = {140731095625424,
-6331151954970213898, 1963266560, 4, 3, 1, -6331151954036981258,
-338229142919249418}, __mask_was_saved = 1, __saved_mask = {__val =
{4194304, 1, 94868059579616, 16, 0, 0, 139720252948637, 0, 94868880165616,
2047, 139720252948109,
                94868055973200, 5008, 1610, 18446744073709551368,
94868879211248}}}}
        send_ready_for_query = false
        idle_in_transaction_timeout_enabled = false
        idle_session_timeout_enabled = false
        __func__ = "PostgresMain"
#31 0x0000564830ac281f in BackendMain (startup_data=<optimized out>,
startup_data_len=<optimized out>) at
./build/../src/backend/tcop/backend_startup.c:124
        bsdata = <optimized out>
#32 0x0000564830a24395 in postmaster_child_launch (child_type=B_BACKEND,
child_slot=3, startup_data=startup_data@entry=0x7ffe82f6b1a0,
startup_data_len=startup_data_len@entry=24,
client_sock=client_sock@entry=0x7ffe82f6b1c0) at
./build/../src/backend/postmaster/launch_backend.c:290
        pid = <optimized out>
#33 0x0000564830a27f1a in BackendStartup (client_sock=0x7ffe82f6b1c0) at
./build/../src/backend/postmaster/postmaster.c:3587
        bn = 0x564861e28a10
        pid = <optimized out>
        startup_data = {canAcceptConnections = CAC_OK, socket_created =
825351973175949, fork_started = 825351973175949}
        cac = <optimized out>
        bn = <optimized out>
        pid = <optimized out>
        startup_data = <optimized out>
        cac = <optimized out>
        __func__ = "BackendStartup"
        __errno_location = <optimized out>
        save_errno = <optimized out>
        __errno_location = <optimized out>
        __errno_location = <optimized out>
#34 ServerLoop () at ./build/../src/backend/postmaster/postmaster.c:1702
        s = {sock = 10, raddr = {addr = {ss_family = 1,
              __ss_padding =

"\220\030\177\000\000\001\000\000\000\000\000\000\000\000\022z\3400HV\000\000\000\000\000\000\000\000\000\0000\262\366\202\376\177\000\000
e\326aHV\000\000\005\000\000\000\376\177\000\000p\\\332aHV\000\000ۡ\3010HV",
'\000' <repeats 18 times>,
"P\262\366\202\376\177\000\000\022z\3400HV\000\000\370\233\330aHV\000\000\000\207\223\240¶\245\216",
__ss_align = 94867237634048}, salen = 2}}
        i = 0
        now = <optimized out>
        last_lockfile_recheck_time = 1772036751
        last_touch_time = 1772035191
        events = {{pos = 2, events = 2, fd = 8, user_data = 0x0}, {pos = 0,
events = 0, fd = 8, user_data = 0x0}, {pos = 1641585408, events = 22088, fd
= 0, user_data = 0x7f13281f4a80}, {pos = 0, events = 0, fd = 1641585656,
user_data = 0x4000000007d}, {pos = 0, events = 32531, fd = 819606780,
            user_data = 0x0}, {pos = -1600944384, events = 2393224898, fd =
1641440544, user_data = 0xfffffffffffffed0}, {pos = 2, events = 0, fd =
1642224304, user_data = 0x7ffe82f6b368}, {pos = 805639072, events = 0, fd =
0, user_data = 0xffffffff}, {pos = -2097761432, events = 32766, fd = -304,
            user_data = 0x7ffe82f6b377}, {pos = 0, events = 0, fd =
1642338164, user_data = 0x7f1328147f27}, {pos = 8, events = 0, fd =
-2097761332, user_data = 0x7ffe82f6b3d0}, {pos = -2097761420, events =
32766, fd = 1642264321, user_data = 0x564861e41777}, {pos = 1642255384,
events = 6662336,
            fd = -1600944384, user_data = 0x7ffe82f6b39c}, {pos =
-2097760972, events = 32766, fd = -2097759408, user_data = 0x7ffe82f6b534},
{pos = -2097760992, events = 32766, fd = 1642338152, user_data =
0x564861e41777}, {pos = 672432151, events = 32531, fd = 0, user_data =
0xa065a8c000000000},
          {pos = -2097760880, events = 32766, fd = -2097759408, user_data =
0x564861e25638}, {pos = 672205612, events = 32531, fd = 1642264542,
user_data = 0x564830c6ebb9 <dostr+121>}, {pos = 669112656, events = 32531,
fd = 819699590, user_data = 0x7ffe82f6bb40}, {pos = -2097759304,
            events = 32766, fd = -2097759648, user_data = 0x564830c6f287
<dopr+455>}, {pos = -2097760912, events = 32766, fd = 820083200, user_data =
0x7ffe82f6b610}, {pos = 1642338152, events = 22088, fd = 1, user_data =
0x10}, {pos = 2, events = 32531, fd = 64, user_data = 0x7ffe82f6bab0}, {
            pos = 818344291, events = 22088, fd = 1642218528, user_data =
0x0}, {pos = 0, events = 0, fd = 1835348, user_data = 0x1}, {pos = 33188,
events = 0, fd = -2097760992, user_data = 0x0}, {pos = 0, events = 0, fd =
0, user_data = 0x8}, {pos = 1771887901, events = 0, fd = 743639855,
            user_data = 0x68acb549}, {pos = 0, events = 0, fd = 1763728652,
user_data = 0xb358547}, {pos = 0, events = 0, fd = 0, user_data = 0x0}, {pos
= 0, events = 0, fd = 0, user_data = 0xa065a8c000000002}, {pos = 0, events =
0, fd = 0, user_data = 0x0}, {pos = 0, events = 0, fd = 0,
            user_data = 0x0}, {pos = 0, events = 0, fd = 1, user_data =
0x100000000}, {pos = 2, events = 17, fd = 0, user_data = 0x3}, {pos = 0,
events = 1, fd = 0, user_data = 0x0}, {pos = 0, events = 0, fd = 0,
user_data = 0x0}, {pos = 0, events = 0, fd = 0, user_data = 0x0}, {pos = 0,
            events = 0, fd = 0, user_data = 0x0}, {pos = 0, events = 0, fd =
-2097760752, user_data = 0x400}, {pos = 0, events = 0, fd = 1771972544,
user_data = 0x2fa448b5}, {pos = 1771972544, events = 0, fd = 799295669,
user_data = 0x699e27c0}, {pos = 799295669, events = 0, fd = 0,
            user_data = 0x0}, {pos = 0, events = 0, fd = 673140088,
user_data = 0x8ea5b6c2a0938700}, {pos = 1641572464, events = 22088, fd =
671842461, user_data = 0x8}, {pos = 1642266192, events = 22088, fd = 2050,
user_data = 0x564861d86870}, {pos = 8, events = 0, fd = 673135712,
            user_data = 0x802}, {pos = -248, events = 4294967295, fd = 17,
user_data = 0x564830c8a273}, {pos = 819779900, events = 22088, fd =
671845050, user_data = 0x7ffe82f6b730}, {pos = 818348887, events = 22088, fd
= 32768, user_data = 0x9}, {pos = -2097760208, events = 32766, fd =
672071077,
            user_data = 0x7f0000000000}, {pos = 9, events = 0, fd =
-2097760464, user_data = 0x7f13280efe12}, {pos = 1642266176, events = 22088,
fd = 673135712, user_data = 0x201c0}, {pos = 671836461, events = 32531, fd =
900, user_data = 0x0}, {pos = 4096, events = 1, fd = 4096, user_data = 0x8},
          {pos = -1600944384, events = 2393224898, fd = 563218977, user_data
= 0xfffffffffffffed0}, {pos = 0, events = 0, fd = -2097760208, user_data =
0x564861e2fe50}, {pos = 818455155, events = 22088, fd = 819779900, user_data
= 0x7f13280b8f4f <free+111>}, {pos = 1642266192, events = 22088,
            fd = -2097760240, user_data = 0xf}, {pos = 9, events = 0, fd =
-2097760240, user_data = 0x7f13280efedd <closedir+13>}, {pos = 1641853536,
events = 22088, fd = 816397001, user_data = 0x564861e2fe50}, {pos =
818455260, events = 22088, fd = -2097759088,
            user_data = 0x564830a96fb8 <RemovePgTempFiles+312>}, {pos =
-2097760160, events = 32766, fd = 671841933, user_data =
0x7367702f65736162}, {pos = 26672, events = 0, fd = 255, user_data =
0x7ffe00000063}}
        nevents = <optimized out>
        __func__ = "ServerLoop"
#35 0x0000564830a298bd in PostmasterMain (argc=argc@entry=17,
argv=argv@entry=0x564861d87890) at
./build/../src/backend/postmaster/postmaster.c:1400
        opt = <optimized out>
        status = <optimized out>
        userDoption = <optimized out>
        listen_addr_saved = <optimized out>
        output_config_variable = <optimized out>
        __func__ = "PostmasterMain"
#36 0x000056483072dbdc in main (argc=17, argv=0x564861d87890) at
./build/../src/backend/main/main.c:227
        do_check_root = <optimized out>
        dispatch_option = <optimized out>





pgsql-bugs by date:

Previous
From: Álvaro Herrera
Date:
Subject: Re: [BUG] Assert failure in ReorderBufferReturnTXN during logical decoding due to leaked specinsert change
Next
From: Robert Haas
Date:
Subject: Re: Major Version Upgrade failure due to orphan roles entries in catalog