Re: BUG #15585: infinite DynamicSharedMemoryControlLock waiting in parallel query - Mailing list pgsql-bugs

From Sergei Kornilov
Subject Re: BUG #15585: infinite DynamicSharedMemoryControlLock waiting in parallel query
Date
Msg-id 70942611548327380@myt6-7734411c649e.qloud-c.yandex.net
Whole thread Raw
In response to Re: BUG #15585: infinite DynamicSharedMemoryControlLock waiting inparallel query  (Thomas Munro <thomas.munro@enterprisedb.com>)
Responses Re: BUG #15585: infinite DynamicSharedMemoryControlLock waiting inparallel query  (Thomas Munro <thomas.munro@enterprisedb.com>)
List pgsql-bugs
Hello

Today I got this error again, same host (earlier i did not disable parallel query), same symptoms, same errors in logs,
samebacktrace on parallel leader. But now i have backtrace from last one stucked parallel worker for this query. And
thisbacktrace looks very strange for me:
 

#0  0x00007eff6dada827 in futex_abstimed_wait_cancelable (private=128, abstime=0x0, expected=0,
futex_word=0x7efb8be0ceb8)at ../sysdeps/unix/sysv/linux/futex-internal.h:205
 
#1  do_futex_wait (sem=sem@entry=0x7efb8be0ceb8, abstime=0x0) at sem_waitcommon.c:111
#2  0x00007eff6dada8d4 in __new_sem_wait_slow (sem=0x7efb8be0ceb8, abstime=0x0) at sem_waitcommon.c:181
#3  0x00007eff6dada97a in __new_sem_wait (sem=sem@entry=0x7efb8be0ceb8) at sem_wait.c:29
#4  0x00005624561c5782 in PGSemaphoreLock (sema=0x7efb8be0ceb8) at pg_sema.c:316
#5  0x0000562456238a0c in LWLockAcquire (lock=0x7efb8be0e600, mode=mode@entry=LW_EXCLUSIVE) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/storage/lmgr/lwlock.c:1233
#6  0x00005624562239f6 in dsm_detach (seg=0x5624578710c8) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/storage/ipc/dsm.c:772
#7  0x0000562456223c52 in dsm_backend_shutdown () at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/storage/ipc/dsm.c:655
#8  0x000056245622426a in shmem_exit (code=code@entry=1) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/storage/ipc/ipc.c:248
#9  0x0000562456224363 in proc_exit_prepare (code=code@entry=1) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/storage/ipc/ipc.c:185
#10 0x0000562456224400 in proc_exit (code=code@entry=1) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/storage/ipc/ipc.c:102
#11 0x0000562456359e06 in errfinish (dummy=dummy@entry=0) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/utils/error/elog.c:543
#12 0x000056245635e384 in elog_finish (elevel=elevel@entry=20, fmt=fmt@entry=0x5624564be7e0 "cannot unpin a segment
thatis not pinned")
 
    at /build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/utils/error/elog.c:1378
#13 0x0000562456224031 in dsm_unpin_segment (handle=1322526153) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/storage/ipc/dsm.c:929
#14 0x000056245637e747 in dsa_release_in_place (place=0x7eff6de081f8) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/utils/mmgr/dsa.c:618
#15 0x0000562456223980 in dsm_detach (seg=0x5624578710c8) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/storage/ipc/dsm.c:746
#16 0x0000562456223c52 in dsm_backend_shutdown () at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/storage/ipc/dsm.c:655
#17 0x000056245622426a in shmem_exit (code=code@entry=1) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/storage/ipc/ipc.c:248
#18 0x0000562456224363 in proc_exit_prepare (code=code@entry=1) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/storage/ipc/ipc.c:185
#19 0x0000562456224400 in proc_exit (code=code@entry=1) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/storage/ipc/ipc.c:102
#20 0x0000562456359e06 in errfinish (dummy=<optimized out>) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/utils/error/elog.c:543
#21 0x0000562456245dbb in ProcessInterrupts () at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/tcop/postgres.c:3034
#22 0x000056245611472d in ExecScanFetch (recheckMtd=0x56245611f800 <BitmapHeapRecheck>, accessMtd=0x56245611f890
<BitmapHeapNext>,node=0x562457958fa8)
 
    at /build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/executor/execScan.c:43
#23 ExecScan (node=0x562457958fa8, accessMtd=0x56245611f890 <BitmapHeapNext>, recheckMtd=0x56245611f800
<BitmapHeapRecheck>)at /build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/executor/execScan.c:164
 
#24 0x000056245611a3b3 in ExecProcNode (node=0x562457958fa8) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/include/executor/executor.h:250
#25 ExecAppend (pstate=0x5624579191f8) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/executor/nodeAppend.c:222
#26 0x000056245611a70c in ExecProcNode (node=0x5624579191f8) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/include/executor/executor.h:250
#27 fetch_input_tuple (aggstate=aggstate@entry=0x562457919430) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/executor/nodeAgg.c:695
#28 0x000056245611c866 in agg_retrieve_direct (aggstate=0x562457919430) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/executor/nodeAgg.c:2448
#29 ExecAgg (pstate=0x562457919430) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/executor/nodeAgg.c:2158
#30 0x000056245610e50b in ExecProcNode (node=0x562457919430) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/include/executor/executor.h:250
#31 ExecutePlan (execute_once=<optimized out>, dest=0x5624578eacf8, direction=<optimized out>, numberTuples=0,
sendTuples=<optimizedout>, operation=CMD_SELECT, use_parallel_mode=<optimized out>, planstate=0x562457919430, 
 
    estate=0x5624579188f8) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/executor/execMain.c:1723
#32 standard_ExecutorRun (queryDesc=0x56245794d2d8, direction=<optimized out>, count=0, execute_once=<optimized out>)
at/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/executor/execMain.c:364
 
#33 0x00007eff64598ea5 in pgss_ExecutorRun (queryDesc=0x56245794d2d8, direction=ForwardScanDirection, count=0,
execute_once=<optimizedout>)
 
    at /build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../contrib/pg_stat_statements/pg_stat_statements.c:889
#34 0x0000562456112dfd in ParallelQueryMain (seg=<optimized out>, toc=0x7eff6de08000) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/executor/execParallel.c:996
#35 0x0000562455fef844 in ParallelWorkerMain (main_arg=<optimized out>) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/access/transam/parallel.c:1214
#36 0x00005624561c87b8 in StartBackgroundWorker () at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/postmaster/bgworker.c:828
#37 0x00005624561d55fb in do_start_bgworker (rw=<optimized out>) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/postmaster/postmaster.c:5728
#38 maybe_start_bgworkers () at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/postmaster/postmaster.c:5941
#39 0x00005624561d62b5 in sigusr1_handler (postgres_signal_arg=<optimized out>) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/postmaster/postmaster.c:5121
#40 <signal handler called>
#41 0x00007eff6b7e15b3 in __select_nocancel () at ../sysdeps/unix/syscall-template.S:84
#42 0x0000562455f7eabc in ServerLoop () at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/postmaster/postmaster.c:1719
#43 0x00005624561d76bb in PostmasterMain (argc=5, argv=<optimized out>) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/postmaster/postmaster.c:1363
#44 0x0000562455f80905 in main (argc=5, argv=0x56245783f850) at
/build/postgresql-10-4UiiBA/postgresql-10-10.6/build/../src/backend/main/main.c:228

We should not call dsm_backend_shutdown twice in same process, right? So we tried call dsm_detach on same segment
0x5624578710c8twice, but this is unexpected behavior and refcnt would be incorrect. And seems we can not LWLockAcquire
lockand then LWLockAcquire same lock again without release. And here we have infinite waiting.
 

regards, Sergei


pgsql-bugs by date:

Previous
From:
Date:
Subject: Suggestion: include interruption method for \watch option (page 1922, PostgreSQL 11.1 Documentation)
Next
From: Francisco Olarte
Date:
Subject: Re: Suggestion: include interruption method for \watch option (page1922, PostgreSQL 11.1 Documentation)