pg11.5: ExecHashJoinNewBatch: glibc detected...double free orcorruption (!prev) - Mailing list pgsql-hackers

From Justin Pryzby
Subject pg11.5: ExecHashJoinNewBatch: glibc detected...double free orcorruption (!prev)
Date
Msg-id 20190825022501.GV15332@telsasoft.com
Whole thread Raw
Responses Re: pg11.5: ExecHashJoinNewBatch: glibc detected...double free orcorruption (!prev)
List pgsql-hackers
Core was generated by `postgres: telsasoft ts 10.100.2.162(33500) SELECT     '.
Program terminated with signal 6, Aborted.
#0  0x00000039ff6325e5 in raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:64
64        return INLINE_SYSCALL (tgkill, 3, pid, selftid, sig);
Missing separate debuginfos, use: debuginfo-install audit-libs-2.4.5-3.el6.x86_64
cyrus-sasl-lib-2.1.23-15.el6_6.2.x86_64geos36-3.6.3-1.rhel6.1.x86_64 json-c-0.11-12.el6.x86_64
keyutils-libs-1.4-5.el6.x86_64libcom_err-1.41.12-22.el6.x86_64 libgcc-4.4.7-17.el6.x86_64 libicu-4.2.1-14.el6.x86_64
libselinux-2.0.94-7.el6.x86_64libstdc++-4.4.7-17.el6.x86_64 nspr-4.11.0-1.el6.x86_64 nss-3.21.0-8.el6.x86_64
nss-softokn-freebl-3.14.3-23.3.el6_8.x86_64nss-util-3.21.0-2.el6.x86_64 postgis24_11-2.4.5-1.rhel6.1.x86_64
proj49-4.9.3-3.rhel6.1.x86_64zlib-1.2.3-29.el6.x86_64
 
(gdb) bt
#0  0x00000039ff6325e5 in raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:64
#1  0x00000039ff633dc5 in abort () at abort.c:92
#2  0x00000039ff6704f7 in __libc_message (do_abort=2, fmt=0x39ff758a60 "*** glibc detected *** %s: %s: 0x%s ***\n") at
../sysdeps/unix/sysv/linux/libc_fatal.c:198
#3  0x00000039ff675f3e in malloc_printerr (action=3, str=0x39ff758df0 "double free or corruption (!prev)", ptr=<value
optimizedout>, ar_ptr=<value optimized out>) at malloc.c:6360
 
#4  0x00000039ff678dd0 in _int_free (av=0x39ff98e120, p=0x1d40b090, have_lock=0) at malloc.c:4846
#5  0x00000000006269e5 in ExecHashJoinNewBatch (pstate=0x2771218) at nodeHashjoin.c:1058
#6  ExecHashJoinImpl (pstate=0x2771218) at nodeHashjoin.c:539
#7  ExecHashJoin (pstate=0x2771218) at nodeHashjoin.c:565
#8  0x00000000006131d0 in ExecProcNodeInstr (node=0x2771218) at execProcnode.c:461
#9  0x0000000000633286 in ExecProcNode (pstate=0x2771108) at ../../../src/include/executor/executor.h:247
#10 ExecSort (pstate=0x2771108) at nodeSort.c:107
#11 0x00000000006131d0 in ExecProcNodeInstr (node=0x2771108) at execProcnode.c:461
#12 0x000000000061d51e in ExecProcNode (aggstate=0x2770d30) at ../../../src/include/executor/executor.h:247
#13 fetch_input_tuple (aggstate=0x2770d30) at nodeAgg.c:406
#14 0x000000000061ee70 in agg_retrieve_direct (pstate=0x2770d30) at nodeAgg.c:1755
#15 ExecAgg (pstate=0x2770d30) at nodeAgg.c:1570
#16 0x00000000006131d0 in ExecProcNodeInstr (node=0x2770d30) at execProcnode.c:461
#17 0x000000000060f4b7 in ExecProcNode (queryDesc=0x2940950, direction=<value optimized out>, count=0, execute_once=48)
at../../../src/include/executor/executor.h:247
 
#18 ExecutePlan (queryDesc=0x2940950, direction=<value optimized out>, count=0, execute_once=48) at execMain.c:1723
#19 standard_ExecutorRun (queryDesc=0x2940950, direction=<value optimized out>, count=0, execute_once=48) at
execMain.c:364
#20 0x00007ff08f74f618 in pgss_ExecutorRun (queryDesc=0x2940950, direction=ForwardScanDirection, count=0,
execute_once=true)at pg_stat_statements.c:892
 
#21 0x00007ff08f2cc81d in explain_ExecutorRun (queryDesc=0x2940950, direction=ForwardScanDirection, count=0,
execute_once=true)at auto_explain.c:281
 
#22 0x000000000075444b in PortalRunSelect (portal=0x2571f00, forward=<value optimized out>, count=0, dest=<value
optimizedout>) at pquery.c:932
 
#23 0x0000000000755631 in PortalRun (portal=0x2571f00, count=9223372036854775807, isTopLevel=true, run_once=true,
dest=0x928e7a8,altdest=0x928e7a8, completionTag=0x7ffcaa940670 "") at pquery.c:773
 
#24 0x0000000000752267 in exec_simple_query (
    query_string=0x2753e08 "--BEGIN SQL\nSELECT * FROM\n\n(SELECT site_office AS site_gran,\n \tsite_location AS
cell,\n\tdisplay_site_name(sect_name, site_name, sect_mscid) AS sitename,\n\tperiod, data_cell.bh_day,\n\n", '-'
<repeats17 times>, "Ac"...) at postgres.c:1145
 
#25 0x0000000000753211 in PostgresMain (argc=<value optimized out>, argv=<value optimized out>, dbname=0x252b1f8 "ts",
username=<valueoptimized out>) at postgres.c:4182
 
#26 0x00000000006e2587 in BackendRun (argc=<value optimized out>, argv=<value optimized out>) at postmaster.c:4358
#27 BackendStartup (argc=<value optimized out>, argv=<value optimized out>) at postmaster.c:4030
#28 ServerLoop (argc=<value optimized out>, argv=<value optimized out>) at postmaster.c:1707
#29 PostmasterMain (argc=<value optimized out>, argv=<value optimized out>) at postmaster.c:1380
#30 0x0000000000656a80 in main (argc=3, argv=0x24f7570) at main.c:228

I found this report from 9.6 last month; copying those parties.
https://www.postgresql.org/message-id/CAHyXU0xzwzgnUTyK62ZkG0_1CQsBHwnTVT2TSX7iwTEv1ve9ag%40mail.gmail.com

Merlin: what OS are you running on and what postgres package, or was it
compiled locally ?

We're running:
postgresql11-server-11.5-1PGDG.rhel6.x86_64
CentOS release 6.8
glibc-2.12-1.192.el6.x86_64
linux 2.6.32-754.3.5.el6.x86_64
DMI: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 09/21/2015
Model name:            Intel(R) Xeon(R) CPU E5-2683 v4 @ 2.10GHz

I see this is available but not installed: 2.12-1.212.el6_10.3

The VM has no known issues.

ts=# SELECT * FROM pg_config();
..
| CONFIGURE         | '--enable-rpath' '--prefix=/usr/pgsql-11' '--includedir=/usr/pgsql-11/include'
'--mandir=/usr/pgsql-11/share/man''--datadir=/usr/pgsql-11/share' '--libdir=/usr/pgsql-11/lib' '--with-icu'
'--with-perl''--with-python' '--with-tcl' '--with-tclconfig=/usr/lib64' '--with-openssl' '--with-pam' '--with-gssapi'
'--with-includes=/usr/include''--with-libraries=/usr/lib64' '--enable-nls' '--with-uuid=e2fs' '--with-libxml'
'--with-libxslt''--with-ldap' '--with-system-tzdata=/usr/share/zoneinfo' '--sysconfdir=/etc/sysconfig/pgsql'
'--docdir=/usr/pgsql-11/doc''--htmldir=/usr/pgsql-11/doc/html' 'CFLAGS=-O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2
-fexceptions-fstack-protector --param=ssp-buffer-size=4 -m64 -mtune=generic' 'LDFLAGS=-Wl,--as-needed'
'PKG_CONFIG_PATH=:/usr/lib64/pkgconfig:/usr/share/pkgconfig''ICU_CFLAGS=-I/usr/include' 'ICU_LIBS=-L/usr/lib64
-licui18n-licuuc -licudata'
 
| CC                | gcc
| CPPFLAGS          | -I/usr/include -D_GNU_SOURCE -I/usr/include/libxml2 -I/usr/include
| CFLAGS            | -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Wendif-labels
-Wmissing-format-attribute-Wformat-security -fno-strict-aliasing -fwrapv -O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2
-fexceptions-fstack-protector --param=ssp-buffer-size=4 -m64 -mtune=generic
 
| CFLAGS_SL         | -fPIC
| LDFLAGS           | -Wl,--as-needed -L/usr/lib64 -Wl,--as-needed -Wl,-rpath,'/usr/pgsql-11/lib',--enable-new-dtags
| LDFLAGS_EX        | 
| LDFLAGS_SL        | 
| LIBS              | -lpgcommon -lpgport -lpthread -lxslt -lxml2 -lpam -lssl -lcrypto -lgssapi_krb5 -lz -lreadline
-lrt-lcrypt -ldl -lm 
 

#0  0x00000039ff6325e5 in raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:64
        resultvar = 0
        pid = <value optimized out>
        selftid = <value optimized out>
#1  0x00000039ff633dc5 in abort () at abort.c:92
        save_stage = 2
        act = {__sigaction_handler = {sa_handler = 0x7ffcaa93f5c8, sa_sigaction = 0x7ffcaa93f5c8}, sa_mask = {__val =
{140723170309552,140723170319605, 49, 249099024494, 3, 140723170309562, 6, 
 
              249099024498, 2, 140723170309550, 2, 249099017763, 1, 249099024494, 3, 140723170309558}}, sa_flags = 10,
sa_restorer= 0x39ff757872}
 
        sigs = {__val = {32, 0 <repeats 15 times>}}
#2  0x00000039ff6704f7 in __libc_message (do_abort=2, fmt=0x39ff758a60 "*** glibc detected *** %s: %s: 0x%s ***\n") at
../sysdeps/unix/sysv/linux/libc_fatal.c:198
        ap = {{gp_offset = 40, fp_offset = 48, overflow_arg_area = 0x7ffcaa93ff30, reg_save_area = 0x7ffcaa93fe40}}
        ap_copy = {{gp_offset = 16, fp_offset = 48, overflow_arg_area = 0x7ffcaa93ff30, reg_save_area =
0x7ffcaa93fe40}}
        fd = 2
        on_2 = <value optimized out>
        list = <value optimized out>
        nlist = <value optimized out>
        cp = <value optimized out>
        written = <value optimized out>
#3  0x00000039ff675f3e in malloc_printerr (action=3, str=0x39ff758df0 "double free or corruption (!prev)", ptr=<value
optimizedout>, ar_ptr=<value optimized out>) at malloc.c:6360 
        buf = "000000001d40b0a0"
        cp = <value optimized out>
#4  0x00000039ff678dd0 in _int_free (av=0x39ff98e120, p=0x1d40b090, have_lock=0) at malloc.c:4846
        size = <value optimized out>
        fb = <value optimized out>
        nextchunk = <value optimized out>
        nextsize = <value optimized out>
        nextinuse = <value optimized out>
        prevsize = <value optimized out>
        bck = <value optimized out>
        fwd = <value optimized out>
        errstr = <value optimized out>
        locked = <value optimized out>
#5  0x00000000006269e5 in ExecHashJoinNewBatch (pstate=0x2771218) at nodeHashjoin.c:1058
        nbatch = <value optimized out>
        curbatch = <value optimized out>
        slot = <value optimized out>
        hashvalue = 1756558929
        hashtable = 0x3c4ed48
        innerFile = 0x1d40b0d8
#6  ExecHashJoinImpl (pstate=0x2771218) at nodeHashjoin.c:539
        outerNode = 0x27714d8
        hashNode = 0x2772348
        econtext = <value optimized out>
        node = 0x2771218
        joinqual = 0x0
        otherqual = 0x0
---Type <return> to continue, or q <return> to quit---
        hashtable = 0xc8
        hashvalue = 0
        batchno = 41357576
        parallel_state = <value optimized out>
#7  ExecHashJoin (pstate=0x2771218) at nodeHashjoin.c:565
No locals.
#8  0x00000000006131d0 in ExecProcNodeInstr (node=0x2771218) at execProcnode.c:461
        result = <value optimized out>
#9  0x0000000000633286 in ExecProcNode (pstate=0x2771108) at ../../../src/include/executor/executor.h:247
No locals.
#10 ExecSort (pstate=0x2771108) at nodeSort.c:107
        plannode = <value optimized out>
        outerNode = 0x2771218
        tupDesc = <value optimized out>
        node = 0x2771108
        estate = 0x2770a40
        dir = ForwardScanDirection
        tuplesortstate = 0x3c87160
        slot = <value optimized out>
...


If GDB isn't lying and I'm not being confuse by git, that's:
                BufFileClose(innerFile);

Which does (at least):

        pfree(file->files);
        pfree(file->offsets);
        pfree(file);


(gdb) p *innerFile
$2 = {numFiles = 1, files = 0xa421328, offsets = 0xa421310, isInterXact = false, dirty = false, readOnly = false,
fileset= 0x0, name = 0x0, resowner = 0x24f93e0, curFile = 0, curOffset = 73016512, 
 
  pos = 0, nbytes = 0, buffer = {
    data = '\000' <repeats 44 times>,
"Q\366\262h\220\004\000\000\000\000\000\000\000\000L\000\003\000(\377\377\377\377\377\177\372\377\377\017\000\000\000\000\000\000\000\000\257\321\345\333\063\002\000\003",
'\000'<repeats 23 times>"\204,
\003\000\000\000\000\000\000K\000\000\000\000\000\000\000K\000\000\000\000\000\000\000K\000\000\000\000\000\000\000K\000\000\000\000\000\000\000b\000\000\000\000\000\000\000c\000\000\000\000\000\000\000c\000\000\000\000\000\000\000d",
'\000'<repeats 15 times>, "q\002\000\000\000\000\000\000d", '\000' <repeats 23 times>,
"I\000\000\000\000\000\000\000\344H\a\000\000\000\000\000\017\000\210\026\000\310\024\000\024H\000\000\000\000\000\000\017\000\210\035\000H!\027\000\210\216\000T\vB\017\304\t\027\000\210\a\000\206\b\033\030.\"",
'\000'<repeats 11 times>..., force_align_d = 0, force_align_i64 = 0}}
 

(gdb) p innerFile->files[0]
$8 = 2397





pgsql-hackers by date:

Previous
From: Floris Van Nee
Date:
Subject: Re: Optimize single tuple fetch from nbtree index
Next
From: Thomas Munro
Date:
Subject: Re: LLVM breakage on seawasp