Re: Issues with ON CONFLICT UPDATE and REINDEX CONCURRENTLY

From: Michail Nikolaev <michail(dot)nikolaev(at)gmail(dot)com>
To: Michael Paquier <michael(at)paquier(dot)xyz>, Noah Misch <noah(at)leadboat(dot)com>
Cc: PostgreSQL Hackers <pgsql-hackers(at)postgresql(dot)org>
Subject: Re: Issues with ON CONFLICT UPDATE and REINDEX CONCURRENTLY
Date: 2024-06-25 15:57:22
Message-ID: CANtu0ogs10w=DgbYzZ8MswXE3PUC3J4SGDc0YEuZZeWbL0b6HA@mail.gmail.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers

Hello, Noah!

Answering
https://www.postgresql.org/message-id/flat/20240612194857.1c.nmisch%40google.com#684361ba86bad11f4e9fd84dfa8e0084

> On your other thread, it would be useful to see stack traces from the
high-CPU
> processes once the live lock has ended all query completion.

I was wrong, it is not a livelock, it is a deadlock, actually. I missed it
because pgbench retries deadlocks automatically.

It looks like this:

2024-06-25 17:16:17.447 CEST [711743] 007_concurrently_unique_stuck.pl
ERROR: deadlock detected
2024-06-25 17:16:17.447 CEST [711743] 007_concurrently_unique_stuck.pl
DETAIL: Process 711743 waits for ShareLock on transaction 3633; blocked by
process 711749.
Process 711749 waits for ShareLock on speculative token 2 of transaction
3622; blocked by process 711743.
Process 711743: INSERT INTO tbl VALUES(13,89318) on conflict(i) do update
set n = tbl.n + 1 RETURNING n
Process 711749: INSERT INTO tbl VALUES(13,41011) on conflict(i) do update
set n = tbl.n + 1 RETURNING n
2024-06-25 17:16:17.447 CEST [711743] 007_concurrently_unique_stuck.pl
HINT: See server log for query details.
2024-06-25 17:16:17.447 CEST [711743] 007_concurrently_unique_stuck.pl
CONTEXT: while inserting index tuple (15,145) in relation "tbl_pkey_ccnew"
2024-06-25 17:16:17.447 CEST [711743] 007_concurrently_unique_stuck.pl
STATEMENT: INSERT INTO tbl VALUES(13,89318) on conflict(i) do update set n
= tbl.n + 1 RETURNING n

Stacktraces:

-------------------------

INSERT INTO tbl VALUES(13,41011) on conflict(i) do update set n = tbl.n + 1
RETURNING n

#0 in epoll_wait (epfd=5, events=0x1203328, maxevents=1, timeout=-1) at
../sysdeps/unix/sysv/linux/epoll_wait.c:30
#1 in WaitEventSetWaitBlock (set=0x12032c0, cur_timeout=-1,
occurred_events=0x7ffcc4e38e30, nevents=1) at latch.c:1570
#2 in WaitEventSetWait (set=0x12032c0, timeout=-1,
occurred_events=0x7ffcc4e38e30, nevents=1, wait_event_info=50331655) at
latch.c:1516
#3 in WaitLatch (latch=0x7acb2a2f5f14, wakeEvents=33, timeout=0,
wait_event_info=50331655) at latch.c:538
#4 in ProcSleep (locallock=0x122f778, lockMethodTable=0x1037340
<default_lockmethod>, dontWait=false) at proc.c:1355
#5 in WaitOnLock (locallock=0x122f778, owner=0x1247408, dontWait=false) at
lock.c:1833
#6 in LockAcquireExtended (locktag=0x7ffcc4e39220, lockmode=5,
sessionLock=false, dontWait=false, reportMemoryError=true, locallockp=0x0)
at lock.c:1046
#7 in LockAcquire (locktag=0x7ffcc4e39220, lockmode=5, sessionLock=false,
dontWait=false) at lock.c:739
#8 in SpeculativeInsertionWait (xid=3622, token=2) at lmgr.c:833
#9 in _bt_doinsert (rel=0x7acb2dbb12e8, itup=0x12f1308,
checkUnique=UNIQUE_CHECK_YES, indexUnchanged=true, heapRel=0x7acb2dbb0f08)
at nbtinsert.c:225
#10 in btinsert (rel=0x7acb2dbb12e8, values=0x7ffcc4e39440,
isnull=0x7ffcc4e39420, ht_ctid=0x12ebe20, heapRel=0x7acb2dbb0f08,
checkUnique=UNIQUE_CHECK_YES, indexUnchanged=true, indexInfo=0x12f08a8) at
nbtree.c:195
#11 in index_insert (indexRelation=0x7acb2dbb12e8, values=0x7ffcc4e39440,
isnull=0x7ffcc4e39420, heap_t_ctid=0x12ebe20, heapRelation=0x7acb2dbb0f08,
checkUnique=UNIQUE_CHECK_YES, indexUnchanged=true, indexInfo=0x12f08a8) at
indexam.c:230
#12 in ExecInsertIndexTuples (resultRelInfo=0x12eaa00, slot=0x12ebdf0,
estate=0x12ea560, update=true, noDupErr=false, specConflict=0x0,
arbiterIndexes=0x0, onlySummarizing=false) at execIndexing.c:438
#13 in ExecUpdateEpilogue (context=0x7ffcc4e39870,
updateCxt=0x7ffcc4e3962c, resultRelInfo=0x12eaa00, tupleid=0x7ffcc4e39732,
oldtuple=0x0, slot=0x12ebdf0) at nodeModifyTable.c:2130
#14 in ExecUpdate (context=0x7ffcc4e39870, resultRelInfo=0x12eaa00,
tupleid=0x7ffcc4e39732, oldtuple=0x0, slot=0x12ebdf0, canSetTag=true) at
nodeModifyTable.c:2478
#15 in ExecOnConflictUpdate (context=0x7ffcc4e39870,
resultRelInfo=0x12eaa00, conflictTid=0x7ffcc4e39732,
excludedSlot=0x12f05b8, canSetTag=true, returning=0x7ffcc4e39738) at
nodeModifyTable.c:2694
#16 in ExecInsert (context=0x7ffcc4e39870, resultRelInfo=0x12eaa00,
slot=0x12f05b8, canSetTag=true, inserted_tuple=0x0, insert_destrel=0x0) at
nodeModifyTable.c:1048
#17 in ExecModifyTable (pstate=0x12ea7f0) at nodeModifyTable.c:4059
#18 in ExecProcNodeFirst (node=0x12ea7f0) at execProcnode.c:464
#19 in ExecProcNode (node=0x12ea7f0) at
../../../src/include/executor/executor.h:274
#20 in ExecutePlan (estate=0x12ea560, planstate=0x12ea7f0,
use_parallel_mode=false, operation=CMD_INSERT, sendTuples=true,
numberTuples=0, direction=ForwardScanDirection, dest=0x12daac8,
execute_once=true) at execMain.c:1646
#21 in standard_ExecutorRun (queryDesc=0x12dab58,
direction=ForwardScanDirection, count=0, execute_once=true) at
execMain.c:363
#22 in ExecutorRun (queryDesc=0x12dab58, direction=ForwardScanDirection,
count=0, execute_once=true) at execMain.c:304
#23 in ProcessQuery (plan=0x12e1360, sourceText=0x12083b0 "INSERT INTO tbl
VALUES(13,41011) on conflict(i) do update set n = tbl.n + 1 RETURNING n ",
params=0x0, queryEnv=0x0, dest=0x12daac8, qc=0x7ffcc4e39ae0) at pquery.c:160
#24 in PortalRunMulti (portal=0x1289c90, isTopLevel=true,
setHoldSnapshot=true, dest=0x12daac8, altdest=0x10382a0 <donothingDR>,
qc=0x7ffcc4e39ae0) at pquery.c:1277
#25 in FillPortalStore (portal=0x1289c90, isTopLevel=true) at pquery.c:1026
#26 in PortalRun (portal=0x1289c90, count=9223372036854775807,
isTopLevel=true, run_once=true, dest=0x12e14c0, altdest=0x12e14c0,
qc=0x7ffcc4e39d30) at pquery.c:763
#27 in exec_simple_query (query_string=0x12083b0 "INSERT INTO tbl
VALUES(13,41011) on conflict(i) do update set n = tbl.n + 1 RETURNING n ")
at postgres.c:1274

-------------------------

INSERT INTO tbl VALUES(13,89318) on conflict(i) do update set n = tbl.n + 1
RETURNING n

#0 in epoll_wait (epfd=5, events=0x1203328, maxevents=1, timeout=-1) at
../sysdeps/unix/sysv/linux/epoll_wait.c:30
#1 in WaitEventSetWaitBlock (set=0x12032c0, cur_timeout=-1,
occurred_events=0x7ffcc4e38f60, nevents=1) at latch.c:1570
#2 in WaitEventSetWait (set=0x12032c0, timeout=-1,
occurred_events=0x7ffcc4e38f60, nevents=1, wait_event_info=50331653) at
latch.c:1516
#3 in WaitLatch (latch=0x7acb2a2f4dbc, wakeEvents=33, timeout=0,
wait_event_info=50331653) at latch.c:538
#4 in ProcSleep (locallock=0x122f670, lockMethodTable=0x1037340
<default_lockmethod>, dontWait=false) at proc.c:1355
#5 in WaitOnLock (locallock=0x122f670, owner=0x1247408, dontWait=false) at
lock.c:1833
#6 in LockAcquireExtended (locktag=0x7ffcc4e39370, lockmode=5,
sessionLock=false, dontWait=false, reportMemoryError=true, locallockp=0x0)
at lock.c:1046
#7 in LockAcquire (locktag=0x7ffcc4e39370, lockmode=5, sessionLock=false,
dontWait=false) at lock.c:739
#8 in XactLockTableWait (xid=3633, rel=0x7acb2dba66d8, ctid=0x1240a68,
oper=XLTW_InsertIndex) at lmgr.c:701
#9 in _bt_doinsert (rel=0x7acb2dba66d8, itup=0x1240a68,
checkUnique=UNIQUE_CHECK_YES, indexUnchanged=false, heapRel=0x7acb2dbb0f08)
at nbtinsert.c:227
#10 in btinsert (rel=0x7acb2dba66d8, values=0x7ffcc4e395c0,
isnull=0x7ffcc4e395a0, ht_ctid=0x12400e8, heapRel=0x7acb2dbb0f08,
checkUnique=UNIQUE_CHECK_YES, indexUnchanged=false, indexInfo=0x1240500) at
nbtree.c:195
#11 in index_insert (indexRelation=0x7acb2dba66d8, values=0x7ffcc4e395c0,
isnull=0x7ffcc4e395a0, heap_t_ctid=0x12400e8, heapRelation=0x7acb2dbb0f08,
checkUnique=UNIQUE_CHECK_YES, indexUnchanged=false, indexInfo=0x1240500) at
indexam.c:230
#12 in ExecInsertIndexTuples (resultRelInfo=0x12eaa00, slot=0x12400b8,
estate=0x12ea560, update=false, noDupErr=true, specConflict=0x7ffcc4e39722,
arbiterIndexes=0x12e0998, onlySummarizing=false) at execIndexing.c:438
#13 in ExecInsert (context=0x7ffcc4e39870, resultRelInfo=0x12eaa00,
slot=0x12400b8, canSetTag=true, inserted_tuple=0x0, insert_destrel=0x0) at
nodeModifyTable.c:1095
#14 in ExecModifyTable (pstate=0x12ea7f0) at nodeModifyTable.c:4059
#15 in ExecProcNodeFirst (node=0x12ea7f0) at execProcnode.c:464
#16 in ExecProcNode (node=0x12ea7f0) at
../../../src/include/executor/executor.h:274
#17 in ExecutePlan (estate=0x12ea560, planstate=0x12ea7f0,
use_parallel_mode=false, operation=CMD_INSERT, sendTuples=true,
numberTuples=0, direction=ForwardScanDirection, dest=0x12daac8,
execute_once=true) at execMain.c:1646
#18 in standard_ExecutorRun (queryDesc=0x12dab58,
direction=ForwardScanDirection, count=0, execute_once=true) at
execMain.c:363
#19 in ExecutorRun (queryDesc=0x12dab58, direction=ForwardScanDirection,
count=0, execute_once=true) at execMain.c:304
#20 in ProcessQuery (plan=0x12e1360, sourceText=0x12083b0 "INSERT INTO tbl
VALUES(13,89318) on conflict(i) do update set n = tbl.n + 1 RETURNING n ",
params=0x0, queryEnv=0x0, dest=0x12daac8, qc=0x7ffcc4e39ae0) at pquery.c:160
#21 in PortalRunMulti (portal=0x1289c90, isTopLevel=true,
setHoldSnapshot=true, dest=0x12daac8, altdest=0x10382a0 <donothingDR>,
qc=0x7ffcc4e39ae0) at pquery.c:1277
#22 in FillPortalStore (portal=0x1289c90, isTopLevel=true) at pquery.c:1026
#23 in PortalRun (portal=0x1289c90, count=9223372036854775807,
isTopLevel=true, run_once=true, dest=0x12e14c0, altdest=0x12e14c0,
qc=0x7ffcc4e39d30) at pquery.c:763
#24 in exec_simple_query (query_string=0x12083b0 "INSERT INTO tbl
VALUES(13,89318) on conflict(i) do update set n = tbl.n + 1 RETURNING n ")
at postgres.c:1274

-------------------------

Also, at that time (but not reported in deadlock) reindex is happening.
Without reindex I am unable to reproduce deadlock.

#0 in epoll_wait (epfd=5, events=0x1203328, maxevents=1, timeout=-1) at
../sysdeps/unix/sysv/linux/epoll_wait.c:30
#1 in WaitEventSetWaitBlock (set=0x12032c0, cur_timeout=-1,
occurred_events=0x7ffcc4e38cd0, nevents=1) at latch.c:1570
#2 in WaitEventSetWait (set=0x12032c0, timeout=-1,
occurred_events=0x7ffcc4e38cd0, nevents=1, wait_event_info=50331654) at
latch.c:1516
#3 in WaitLatch (latch=0x7acb2a2ff0c4, wakeEvents=33, timeout=0,
wait_event_info=50331654) at latch.c:538
#4 in ProcSleep (locallock=0x122f358, lockMethodTable=0x1037340
<default_lockmethod>, dontWait=false) at proc.c:1355
#5 in WaitOnLock (locallock=0x122f358, owner=0x12459f0, dontWait=false) at
lock.c:1833
#6 in LockAcquireExtended (locktag=0x7ffcc4e390e0, lockmode=5,
sessionLock=false, dontWait=false, reportMemoryError=true, locallockp=0x0)
at lock.c:1046
#7 in LockAcquire (locktag=0x7ffcc4e390e0, lockmode=5, sessionLock=false,
dontWait=false) at lock.c:739
#8 in VirtualXactLock (vxid=..., wait=true) at lock.c:4627
#9 in WaitForLockersMultiple (locktags=0x12327a8, lockmode=8,
progress=true) at lmgr.c:955
#10 in ReindexRelationConcurrently (stmt=0x1208e08, relationOid=16401,
params=0x7ffcc4e39528) at indexcmds.c:4154
#11 in ReindexIndex (stmt=0x1208e08, params=0x7ffcc4e39528,
isTopLevel=true) at indexcmds.c:2814
#12 in ExecReindex (pstate=0x12329f0, stmt=0x1208e08, isTopLevel=true) at
indexcmds.c:2743
#13 in ProcessUtilitySlow (pstate=0x12329f0, pstmt=0x1208f58,
queryString=0x12083b0 "REINDEX INDEX CONCURRENTLY tbl_pkey;",
context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0, dest=0x1209318,
qc=0x7ffcc4e39d30) at utility.c:1567
#14 in standard_ProcessUtility (pstmt=0x1208f58, queryString=0x12083b0
"REINDEX INDEX CONCURRENTLY tbl_pkey;", readOnlyTree=false,
context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0, dest=0x1209318,
qc=0x7ffcc4e39d30) at utility.c:1067
#15 in ProcessUtility (pstmt=0x1208f58, queryString=0x12083b0 "REINDEX
INDEX CONCURRENTLY tbl_pkey;", readOnlyTree=false,
context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0, dest=0x1209318,
qc=0x7ffcc4e39d30) at utility.c:523
#16 in PortalRunUtility (portal=0x1289c90, pstmt=0x1208f58,
isTopLevel=true, setHoldSnapshot=false, dest=0x1209318, qc=0x7ffcc4e39d30)
at pquery.c:1158
#17 in PortalRunMulti (portal=0x1289c90, isTopLevel=true,
setHoldSnapshot=false, dest=0x1209318, altdest=0x1209318,
qc=0x7ffcc4e39d30) at pquery.c:1315
#18 in PortalRun (portal=0x1289c90, count=9223372036854775807,
isTopLevel=true, run_once=true, dest=0x1209318, altdest=0x1209318,
qc=0x7ffcc4e39d30) at pquery.c:791
#19 in exec_simple_query (query_string=0x12083b0 "REINDEX INDEX
CONCURRENTLY tbl_pkey;") at postgres.c:1274

It looks like a deadlock caused by different set of indexes being used as
arbiter indexes (or by the different order).

Best regards,
Mikhail.

In response to

Responses

Browse pgsql-hackers by date

  From Date Subject
Next Message Andrew Dunstan 2024-06-25 15:59:01 Re: Backporting BackgroundPsql
Previous Message Jacob Champion 2024-06-25 15:51:03 Re: Direct SSL connection and ALPN loose ends