Re: Issues with ON CONFLICT UPDATE and REINDEX CONCURRENTLY - Mailing list pgsql-hackers

From Michail Nikolaev
Subject Re: Issues with ON CONFLICT UPDATE and REINDEX CONCURRENTLY
Date
Msg-id CANtu0ogs10w=DgbYzZ8MswXE3PUC3J4SGDc0YEuZZeWbL0b6HA@mail.gmail.com
Whole thread Raw
In response to Re: Issues with ON CONFLICT UPDATE and REINDEX CONCURRENTLY  (Michail Nikolaev <michail.nikolaev@gmail.com>)
Responses Re: Issues with ON CONFLICT UPDATE and REINDEX CONCURRENTLY
List pgsql-hackers
Hello, Noah!


> On your other thread, it would be useful to see stack traces from the high-CPU
> processes once the live lock has ended all query completion.

I was wrong, it is not a livelock, it is a deadlock, actually. I missed it because pgbench retries deadlocks automatically.

It looks like this:

2024-06-25 17:16:17.447 CEST [711743] 007_concurrently_unique_stuck.pl ERROR:  deadlock detected
2024-06-25 17:16:17.447 CEST [711743] 007_concurrently_unique_stuck.pl DETAIL:  Process 711743 waits for ShareLock on transaction 3633; blocked by process 711749.
Process 711749 waits for ShareLock on speculative token 2 of transaction 3622; blocked by process 711743.
Process 711743: INSERT INTO tbl VALUES(13,89318) on conflict(i) do update set n = tbl.n + 1 RETURNING n
Process 711749: INSERT INTO tbl VALUES(13,41011) on conflict(i) do update set n = tbl.n + 1 RETURNING n
2024-06-25 17:16:17.447 CEST [711743] 007_concurrently_unique_stuck.pl HINT:  See server log for query details.
2024-06-25 17:16:17.447 CEST [711743] 007_concurrently_unique_stuck.pl CONTEXT:  while inserting index tuple (15,145) in relation "tbl_pkey_ccnew"
2024-06-25 17:16:17.447 CEST [711743] 007_concurrently_unique_stuck.pl STATEMENT:  INSERT INTO tbl VALUES(13,89318) on conflict(i) do update set n = tbl.n + 1 RETURNING n

Stacktraces:

-------------------------

INSERT INTO tbl VALUES(13,41011) on conflict(i) do update set n = tbl.n + 1 RETURNING n

#0  in epoll_wait (epfd=5, events=0x1203328, maxevents=1, timeout=-1) at ../sysdeps/unix/sysv/linux/epoll_wait.c:30
#1  in WaitEventSetWaitBlock (set=0x12032c0, cur_timeout=-1, occurred_events=0x7ffcc4e38e30, nevents=1) at latch.c:1570
#2  in WaitEventSetWait (set=0x12032c0, timeout=-1, occurred_events=0x7ffcc4e38e30, nevents=1, wait_event_info=50331655) at latch.c:1516
#3  in WaitLatch (latch=0x7acb2a2f5f14, wakeEvents=33, timeout=0, wait_event_info=50331655) at latch.c:538
#4  in ProcSleep (locallock=0x122f778, lockMethodTable=0x1037340 <default_lockmethod>, dontWait=false) at proc.c:1355
#5  in WaitOnLock (locallock=0x122f778, owner=0x1247408, dontWait=false) at lock.c:1833
#6  in LockAcquireExtended (locktag=0x7ffcc4e39220, lockmode=5, sessionLock=false, dontWait=false, reportMemoryError=true, locallockp=0x0) at lock.c:1046
#7  in LockAcquire (locktag=0x7ffcc4e39220, lockmode=5, sessionLock=false, dontWait=false) at lock.c:739
#8  in SpeculativeInsertionWait (xid=3622, token=2) at lmgr.c:833
#9  in _bt_doinsert (rel=0x7acb2dbb12e8, itup=0x12f1308, checkUnique=UNIQUE_CHECK_YES, indexUnchanged=true, heapRel=0x7acb2dbb0f08) at nbtinsert.c:225
#10 in btinsert (rel=0x7acb2dbb12e8, values=0x7ffcc4e39440, isnull=0x7ffcc4e39420, ht_ctid=0x12ebe20, heapRel=0x7acb2dbb0f08, checkUnique=UNIQUE_CHECK_YES, indexUnchanged=true, indexInfo=0x12f08a8) at nbtree.c:195
#11 in index_insert (indexRelation=0x7acb2dbb12e8, values=0x7ffcc4e39440, isnull=0x7ffcc4e39420, heap_t_ctid=0x12ebe20, heapRelation=0x7acb2dbb0f08, checkUnique=UNIQUE_CHECK_YES, indexUnchanged=true, indexInfo=0x12f08a8) at indexam.c:230
#12 in ExecInsertIndexTuples (resultRelInfo=0x12eaa00, slot=0x12ebdf0, estate=0x12ea560, update=true, noDupErr=false, specConflict=0x0, arbiterIndexes=0x0, onlySummarizing=false) at execIndexing.c:438
#13 in ExecUpdateEpilogue (context=0x7ffcc4e39870, updateCxt=0x7ffcc4e3962c, resultRelInfo=0x12eaa00, tupleid=0x7ffcc4e39732, oldtuple=0x0, slot=0x12ebdf0) at nodeModifyTable.c:2130
#14 in ExecUpdate (context=0x7ffcc4e39870, resultRelInfo=0x12eaa00, tupleid=0x7ffcc4e39732, oldtuple=0x0, slot=0x12ebdf0, canSetTag=true) at nodeModifyTable.c:2478
#15 in ExecOnConflictUpdate (context=0x7ffcc4e39870, resultRelInfo=0x12eaa00, conflictTid=0x7ffcc4e39732, excludedSlot=0x12f05b8, canSetTag=true, returning=0x7ffcc4e39738) at nodeModifyTable.c:2694
#16 in ExecInsert (context=0x7ffcc4e39870, resultRelInfo=0x12eaa00, slot=0x12f05b8, canSetTag=true, inserted_tuple=0x0, insert_destrel=0x0) at nodeModifyTable.c:1048
#17 in ExecModifyTable (pstate=0x12ea7f0) at nodeModifyTable.c:4059
#18 in ExecProcNodeFirst (node=0x12ea7f0) at execProcnode.c:464
#19 in ExecProcNode (node=0x12ea7f0) at ../../../src/include/executor/executor.h:274
#20 in ExecutePlan (estate=0x12ea560, planstate=0x12ea7f0, use_parallel_mode=false, operation=CMD_INSERT, sendTuples=true, numberTuples=0, direction=ForwardScanDirection, dest=0x12daac8, execute_once=true) at execMain.c:1646
#21 in standard_ExecutorRun (queryDesc=0x12dab58, direction=ForwardScanDirection, count=0, execute_once=true) at execMain.c:363
#22 in ExecutorRun (queryDesc=0x12dab58, direction=ForwardScanDirection, count=0, execute_once=true) at execMain.c:304
#23 in ProcessQuery (plan=0x12e1360, sourceText=0x12083b0 "INSERT INTO tbl VALUES(13,41011) on conflict(i) do update set n = tbl.n + 1 RETURNING n ", params=0x0, queryEnv=0x0, dest=0x12daac8, qc=0x7ffcc4e39ae0) at pquery.c:160
#24 in PortalRunMulti (portal=0x1289c90, isTopLevel=true, setHoldSnapshot=true, dest=0x12daac8, altdest=0x10382a0 <donothingDR>, qc=0x7ffcc4e39ae0) at pquery.c:1277
#25 in FillPortalStore (portal=0x1289c90, isTopLevel=true) at pquery.c:1026
#26 in PortalRun (portal=0x1289c90, count=9223372036854775807, isTopLevel=true, run_once=true, dest=0x12e14c0, altdest=0x12e14c0, qc=0x7ffcc4e39d30) at pquery.c:763
#27 in exec_simple_query (query_string=0x12083b0 "INSERT INTO tbl VALUES(13,41011) on conflict(i) do update set n = tbl.n + 1 RETURNING n ") at postgres.c:1274


-------------------------

INSERT INTO tbl VALUES(13,89318) on conflict(i) do update set n = tbl.n + 1 RETURNING n

#0  in epoll_wait (epfd=5, events=0x1203328, maxevents=1, timeout=-1) at ../sysdeps/unix/sysv/linux/epoll_wait.c:30
#1  in WaitEventSetWaitBlock (set=0x12032c0, cur_timeout=-1, occurred_events=0x7ffcc4e38f60, nevents=1) at latch.c:1570
#2  in WaitEventSetWait (set=0x12032c0, timeout=-1, occurred_events=0x7ffcc4e38f60, nevents=1, wait_event_info=50331653) at latch.c:1516
#3  in WaitLatch (latch=0x7acb2a2f4dbc, wakeEvents=33, timeout=0, wait_event_info=50331653) at latch.c:538
#4  in ProcSleep (locallock=0x122f670, lockMethodTable=0x1037340 <default_lockmethod>, dontWait=false) at proc.c:1355
#5  in WaitOnLock (locallock=0x122f670, owner=0x1247408, dontWait=false) at lock.c:1833
#6  in LockAcquireExtended (locktag=0x7ffcc4e39370, lockmode=5, sessionLock=false, dontWait=false, reportMemoryError=true, locallockp=0x0) at lock.c:1046
#7  in LockAcquire (locktag=0x7ffcc4e39370, lockmode=5, sessionLock=false, dontWait=false) at lock.c:739
#8  in XactLockTableWait (xid=3633, rel=0x7acb2dba66d8, ctid=0x1240a68, oper=XLTW_InsertIndex) at lmgr.c:701
#9  in _bt_doinsert (rel=0x7acb2dba66d8, itup=0x1240a68, checkUnique=UNIQUE_CHECK_YES, indexUnchanged=false, heapRel=0x7acb2dbb0f08) at nbtinsert.c:227
#10 in btinsert (rel=0x7acb2dba66d8, values=0x7ffcc4e395c0, isnull=0x7ffcc4e395a0, ht_ctid=0x12400e8, heapRel=0x7acb2dbb0f08, checkUnique=UNIQUE_CHECK_YES, indexUnchanged=false, indexInfo=0x1240500) at nbtree.c:195
#11 in index_insert (indexRelation=0x7acb2dba66d8, values=0x7ffcc4e395c0, isnull=0x7ffcc4e395a0, heap_t_ctid=0x12400e8, heapRelation=0x7acb2dbb0f08, checkUnique=UNIQUE_CHECK_YES, indexUnchanged=false, indexInfo=0x1240500) at indexam.c:230
#12 in ExecInsertIndexTuples (resultRelInfo=0x12eaa00, slot=0x12400b8, estate=0x12ea560, update=false, noDupErr=true, specConflict=0x7ffcc4e39722, arbiterIndexes=0x12e0998, onlySummarizing=false) at execIndexing.c:438
#13 in ExecInsert (context=0x7ffcc4e39870, resultRelInfo=0x12eaa00, slot=0x12400b8, canSetTag=true, inserted_tuple=0x0, insert_destrel=0x0) at nodeModifyTable.c:1095
#14 in ExecModifyTable (pstate=0x12ea7f0) at nodeModifyTable.c:4059
#15 in ExecProcNodeFirst (node=0x12ea7f0) at execProcnode.c:464
#16 in ExecProcNode (node=0x12ea7f0) at ../../../src/include/executor/executor.h:274
#17 in ExecutePlan (estate=0x12ea560, planstate=0x12ea7f0, use_parallel_mode=false, operation=CMD_INSERT, sendTuples=true, numberTuples=0, direction=ForwardScanDirection, dest=0x12daac8, execute_once=true) at execMain.c:1646
#18 in standard_ExecutorRun (queryDesc=0x12dab58, direction=ForwardScanDirection, count=0, execute_once=true) at execMain.c:363
#19 in ExecutorRun (queryDesc=0x12dab58, direction=ForwardScanDirection, count=0, execute_once=true) at execMain.c:304
#20 in ProcessQuery (plan=0x12e1360, sourceText=0x12083b0 "INSERT INTO tbl VALUES(13,89318) on conflict(i) do update set n = tbl.n + 1 RETURNING n ", params=0x0, queryEnv=0x0, dest=0x12daac8, qc=0x7ffcc4e39ae0) at pquery.c:160
#21 in PortalRunMulti (portal=0x1289c90, isTopLevel=true, setHoldSnapshot=true, dest=0x12daac8, altdest=0x10382a0 <donothingDR>, qc=0x7ffcc4e39ae0) at pquery.c:1277
#22 in FillPortalStore (portal=0x1289c90, isTopLevel=true) at pquery.c:1026
#23 in PortalRun (portal=0x1289c90, count=9223372036854775807, isTopLevel=true, run_once=true, dest=0x12e14c0, altdest=0x12e14c0, qc=0x7ffcc4e39d30) at pquery.c:763
#24 in exec_simple_query (query_string=0x12083b0 "INSERT INTO tbl VALUES(13,89318) on conflict(i) do update set n = tbl.n + 1 RETURNING n ") at postgres.c:1274

-------------------------

Also, at that time (but not reported in deadlock) reindex is happening. Without reindex I am unable to reproduce deadlock.

#0  in epoll_wait (epfd=5, events=0x1203328, maxevents=1, timeout=-1) at ../sysdeps/unix/sysv/linux/epoll_wait.c:30
#1  in WaitEventSetWaitBlock (set=0x12032c0, cur_timeout=-1, occurred_events=0x7ffcc4e38cd0, nevents=1) at latch.c:1570
#2  in WaitEventSetWait (set=0x12032c0, timeout=-1, occurred_events=0x7ffcc4e38cd0, nevents=1, wait_event_info=50331654) at latch.c:1516
#3  in WaitLatch (latch=0x7acb2a2ff0c4, wakeEvents=33, timeout=0, wait_event_info=50331654) at latch.c:538
#4  in ProcSleep (locallock=0x122f358, lockMethodTable=0x1037340 <default_lockmethod>, dontWait=false) at proc.c:1355
#5  in WaitOnLock (locallock=0x122f358, owner=0x12459f0, dontWait=false) at lock.c:1833
#6  in LockAcquireExtended (locktag=0x7ffcc4e390e0, lockmode=5, sessionLock=false, dontWait=false, reportMemoryError=true, locallockp=0x0) at lock.c:1046
#7  in LockAcquire (locktag=0x7ffcc4e390e0, lockmode=5, sessionLock=false, dontWait=false) at lock.c:739
#8  in VirtualXactLock (vxid=..., wait=true) at lock.c:4627
#9  in WaitForLockersMultiple (locktags=0x12327a8, lockmode=8, progress=true) at lmgr.c:955
#10 in ReindexRelationConcurrently (stmt=0x1208e08, relationOid=16401, params=0x7ffcc4e39528) at indexcmds.c:4154
#11 in ReindexIndex (stmt=0x1208e08, params=0x7ffcc4e39528, isTopLevel=true) at indexcmds.c:2814
#12 in ExecReindex (pstate=0x12329f0, stmt=0x1208e08, isTopLevel=true) at indexcmds.c:2743
#13 in ProcessUtilitySlow (pstate=0x12329f0, pstmt=0x1208f58, queryString=0x12083b0 "REINDEX INDEX CONCURRENTLY tbl_pkey;", context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0, dest=0x1209318, qc=0x7ffcc4e39d30) at utility.c:1567
#14 in standard_ProcessUtility (pstmt=0x1208f58, queryString=0x12083b0 "REINDEX INDEX CONCURRENTLY tbl_pkey;", readOnlyTree=false, context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0, dest=0x1209318, qc=0x7ffcc4e39d30) at utility.c:1067
#15 in ProcessUtility (pstmt=0x1208f58, queryString=0x12083b0 "REINDEX INDEX CONCURRENTLY tbl_pkey;", readOnlyTree=false, context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0, dest=0x1209318, qc=0x7ffcc4e39d30) at utility.c:523
#16 in PortalRunUtility (portal=0x1289c90, pstmt=0x1208f58, isTopLevel=true, setHoldSnapshot=false, dest=0x1209318, qc=0x7ffcc4e39d30) at pquery.c:1158
#17 in PortalRunMulti (portal=0x1289c90, isTopLevel=true, setHoldSnapshot=false, dest=0x1209318, altdest=0x1209318, qc=0x7ffcc4e39d30) at pquery.c:1315
#18 in PortalRun (portal=0x1289c90, count=9223372036854775807, isTopLevel=true, run_once=true, dest=0x1209318, altdest=0x1209318, qc=0x7ffcc4e39d30) at pquery.c:791
#19 in exec_simple_query (query_string=0x12083b0 "REINDEX INDEX CONCURRENTLY tbl_pkey;") at postgres.c:1274


It looks like a deadlock caused by different set of indexes being used as arbiter indexes (or by the different order).

Best regards,
Mikhail.



pgsql-hackers by date:

Previous
From: Jacob Champion
Date:
Subject: Re: Direct SSL connection and ALPN loose ends
Next
From: Andrew Dunstan
Date:
Subject: Re: Backporting BackgroundPsql