Re: pg11.1: dsa_area could not attach to segment - Mailing list pgsql-hackers

From Justin Pryzby
Subject Re: pg11.1: dsa_area could not attach to segment
Date
Msg-id 20190205163509.GM29720@telsasoft.com
Whole thread Raw
In response to pg11.1: dsa_area could not attach to segment  (Justin Pryzby <pryzby@telsasoft.com>)
Responses Re: pg11.1: dsa_area could not attach to segment
List pgsql-hackers
I finally reproduced this with core..

For some reason I needed to write assert() rather than elog(PANIC), otherwise
it failed with ERROR and no core..

@@ -1741,4 +1743,5 @@ get_segment_by_index(dsa_area *area, dsa_segment_index index)
                segment = dsm_attach(handle);
+               assert (segment != NULL);
                if (segment == NULL)
-                       elog(ERROR, "dsa_area could not attach to segment");
+                       elog(PANIC, "dsa_area could not attach to segment");
                if (area->mapping_pinned)

On Mon, Dec 03, 2018 at 11:45:00AM +1300, Thomas Munro wrote:
                                                                     
 
> If anyone can reproduce this problem with a debugger, it'd be
                                                                     
 
> interesting to see the output of dsa_dump(area), and
                                                                     
 
> FreePageManagerDump(segment_map->fpm).

Looks like this will take some work, is it ok if I make a coredump available to
you ?  I'm not sure how sensitive it is to re/compilation, but I'm using PG11.1
compiled locally on centos6.

/var/log/postgresql/postgresql-2019-02-05_111730.log-< 2019-02-05 11:17:31.372 EST  >LOG:  background worker "parallel
worker"(PID 17110) was terminated by signal 6: Aborted
 
/var/log/postgresql/postgresql-2019-02-05_111730.log:< 2019-02-05 11:17:31.372 EST  >DETAIL:  Failed process was
running:SELECT colcld.child c, parent p, array_agg(colpar.attname::text ORDER BY colpar.attnum) cols,
array_agg(format_type(colpar.atttypid,colpar.atttypmod) ORDER BY colpar.attnum) AS types FROM queued_alters qa JOIN
pg_attributecolpar ON to_regclass(qa.parent)=colpar.attrelid AND colpar.attnum>0 AND NOT colpar.attisdropped JOIN
(SELECT*, attrelid::regclass::text AS child FROM pg_attribute) colcld ON to_regclass(qa.child) =colcld.attrelid AND
colcld.attnum>0AND NOT colcld.attisdropped WHERE colcld.attname=colpar.attname AND colpar.atttypid!=colcld.atttypid
GROUPBY 1,2 ORDER BY parent LIKE 'unused%', regexp_replace(colcld.child,
'.*_((([0-9]{4}_[0-9]{2})_[0-9]{2})|(([0-9]{6})([0-9]{2})?))$','\3\5') DESC, regexp_replace(colcld.child, '.*_', '')
DESCLIMIT 1
 

(gdb) bt
#0  0x00000037b9c32495 in raise () from /lib64/libc.so.6
#1  0x00000037b9c33c75 in abort () from /lib64/libc.so.6
#2  0x00000037b9c2b60e in __assert_fail_base () from /lib64/libc.so.6
#3  0x00000037b9c2b6d0 in __assert_fail () from /lib64/libc.so.6
#4  0x00000000008c4a72 in get_segment_by_index (area=0x2788440, index=<value optimized out>) at dsa.c:1744
#5  0x00000000008c58e9 in get_best_segment (area=0x2788440, npages=8) at dsa.c:1995
#6  0x00000000008c6c99 in dsa_allocate_extended (area=0x2788440, size=32768, flags=0) at dsa.c:703
#7  0x000000000064c6fe in ExecParallelHashTupleAlloc (hashtable=0x27affb0, size=104, shared=0x7ffc6b5cfc48) at
nodeHash.c:2837
#8  0x000000000064cb92 in ExecParallelHashTableInsert (hashtable=0x27affb0, slot=<value optimized out>,
hashvalue=423104953)at nodeHash.c:1693
 
#9  0x000000000064cf17 in MultiExecParallelHash (node=0x27a1ed8) at nodeHash.c:288
#10 MultiExecHash (node=0x27a1ed8) at nodeHash.c:112
#11 0x000000000064e1f8 in ExecHashJoinImpl (pstate=0x2793038) at nodeHashjoin.c:290
#12 ExecParallelHashJoin (pstate=0x2793038) at nodeHashjoin.c:581
#13 0x0000000000638ce0 in ExecProcNodeInstr (node=0x2793038) at execProcnode.c:461
#14 0x00000000006349c7 in ExecProcNode (queryDesc=0x2782cd0, direction=<value optimized out>, count=0, execute_once=56)
at../../../src/include/executor/executor.h:237
 
#15 ExecutePlan (queryDesc=0x2782cd0, direction=<value optimized out>, count=0, execute_once=56) at execMain.c:1723
#16 standard_ExecutorRun (queryDesc=0x2782cd0, direction=<value optimized out>, count=0, execute_once=56) at
execMain.c:364
#17 0x00007f84a97c8618 in pgss_ExecutorRun (queryDesc=0x2782cd0, direction=ForwardScanDirection, count=0,
execute_once=true)at pg_stat_statements.c:892
 
#18 0x00007f84a93357dd in explain_ExecutorRun (queryDesc=0x2782cd0, direction=ForwardScanDirection, count=0,
execute_once=true)at auto_explain.c:268
 
#19 0x0000000000635071 in ParallelQueryMain (seg=0x268fba8, toc=0x7f84a9578000) at execParallel.c:1402
#20 0x0000000000508f34 in ParallelWorkerMain (main_arg=<value optimized out>) at parallel.c:1409
#21 0x0000000000704760 in StartBackgroundWorker () at bgworker.c:834
#22 0x000000000070e11c in do_start_bgworker () at postmaster.c:5698
#23 maybe_start_bgworkers () at postmaster.c:5911
#24 0x0000000000710786 in sigusr1_handler (postgres_signal_arg=<value optimized out>) at postmaster.c:5091
#25 <signal handler called>
#26 0x00000037b9ce1603 in __select_nocancel () from /lib64/libc.so.6
#27 0x000000000071300e in ServerLoop (argc=<value optimized out>, argv=<value optimized out>) at postmaster.c:1670
#28 PostmasterMain (argc=<value optimized out>, argv=<value optimized out>) at postmaster.c:1379
#29 0x000000000067e8c0 in main (argc=3, argv=0x265f960) at main.c:228

#0  0x00000037b9c32495 in raise () from /lib64/libc.so.6
No symbol table info available.
#1  0x00000037b9c33c75 in abort () from /lib64/libc.so.6
No symbol table info available.
#2  0x00000037b9c2b60e in __assert_fail_base () from /lib64/libc.so.6
No symbol table info available.
#3  0x00000037b9c2b6d0 in __assert_fail () from /lib64/libc.so.6
No symbol table info available.
#4  0x00000000008c4a72 in get_segment_by_index (area=0x2788440, index=<value optimized out>) at dsa.c:1744
        handle = <value optimized out>
        segment = 0x0
        segment_map = <value optimized out>
        __func__ = "get_segment_by_index"
        __PRETTY_FUNCTION__ = "get_segment_by_index"
#5  0x00000000008c58e9 in get_best_segment (area=0x2788440, npages=8) at dsa.c:1995
        segment_map = <value optimized out>
        next_segment_index = <value optimized out>
        contiguous_pages = <value optimized out>
        threshold = 512
        segment_index = 10
        bin = <value optimized out>
#6  0x00000000008c6c99 in dsa_allocate_extended (area=0x2788440, size=32768, flags=0) at dsa.c:703
        npages = 8
        first_page = <value optimized out>
        span_pointer = 8796097199728
        pool = 0x7f84a9579730
        size_class = <value optimized out>
        start_pointer = <value optimized out>
        segment_map = <value optimized out>
        result = 140207753496128
        __func__ = "dsa_allocate_extended"
        __PRETTY_FUNCTION__ = "dsa_allocate_extended"
#7  0x000000000064c6fe in ExecParallelHashTupleAlloc (hashtable=0x27affb0, size=104, shared=0x7ffc6b5cfc48) at
nodeHash.c:2837
        pstate = 0x7f84a9578540
        chunk_shared = <value optimized out>
        chunk = <value optimized out>
        chunk_size = 32768
        result = <value optimized out>
        curbatch = 0
#8  0x000000000064cb92 in ExecParallelHashTableInsert (hashtable=0x27affb0, slot=<value optimized out>,
hashvalue=423104953)at nodeHash.c:1693
 
        hashTuple = <value optimized out>
        tuple = 0x27b00c8
        shared = <value optimized out>
        bucketno = 1577401
        batchno = 0
#9  0x000000000064cf17 in MultiExecParallelHash (node=0x27a1ed8) at nodeHash.c:288
        outerNode = 0x27a1ff0
        hashkeys = 0x27af110
        slot = 0x27a3d70
        econtext = 0x27a3798
        hashvalue = 423104953
        i = <value optimized out>
        pstate = 0x7f84a9578540
        hashtable = 0x27affb0
        build_barrier = 0x7f84a9578590
#10 MultiExecHash (node=0x27a1ed8) at nodeHash.c:112
No locals.
#11 0x000000000064e1f8 in ExecHashJoinImpl (pstate=0x2793038) at nodeHashjoin.c:290
        outerNode = 0x2792f20
        hashNode = 0x27a1ed8
        econtext = 0x2792c68
        outerTupleSlot = 0x1
        node = 0x2793038
        joinqual = 0x27ac270
        otherqual = 0x0
        hashtable = 0x27affb0
        hashvalue = 0
        batchno = 41493896
        parallel_state = 0x7f84a9578540
#12 ExecParallelHashJoin (pstate=0x2793038) at nodeHashjoin.c:581
No locals.

Justin


pgsql-hackers by date:

Previous
From: Tom Lane
Date:
Subject: Re: Fix optimization of foreign-key on update actions
Next
From: Bruce Momjian
Date:
Subject: Re: Commit Fest 2019-01 is now closed