From: | Tender Wang <tndrwang(at)gmail(dot)com> |
---|---|
To: | Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us> |
Cc: | Alvaro Herrera <alvherre(at)alvh(dot)no-ip(dot)org>, jian he <jian(dot)universality(at)gmail(dot)com>, Pg Hackers <pgsql-hackers(at)lists(dot)postgresql(dot)org> |
Subject: | Re: not null constraints, again |
Date: | 2025-04-15 03:20:53 |
Message-ID: | CAHewXNk8B4S1W0ZUcEyBWOLcPLLc3Qvi+aiz7BW4Mn_gC_15yQ@mail.gmail.com |
Views: | Raw Message | Whole Thread | Download mbox | Resend email |
Thread: | |
Lists: | pgsql-hackers |
Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us> 于2025年4月15日周二 05:39写道:
> Alvaro Herrera <alvherre(at)alvh(dot)no-ip(dot)org> writes:
> > On 2025-Apr-14, Tom Lane wrote:
> >> I would not have expected that adding pg_constraint rows implies
> >> stronger locks than what ALTER ADD PRIMARY KEY was using before,
> >> and I suspect that doing so will cause more problems than just
> >> breaking parallel restore.
>
> > I wasn't aware of this side effect. I'll investigate this in more
> > depth. I suspect it might be a bug in the way we run through ALTER
> > TABLE for the primary key.
>
> After further thought it occurs to me that it might not be a case
> of "we get stronger locks", but a case of "we accidentally get a
> weaker lock earlier and then try to upgrade it", thus creating a
> possibility of deadlock where before we'd just have blocked till
> the other statement cleared. Still worthy of being fixed if that's
> true, though.
>
I added sleep(1) in the DeadLockReport() before error report to display
the status when a deadlock happened.
bool continue_sleep = true;
do
{
sleep(1);
} while (continue_sleep);
ereport(ERROR,
(errcode(ERRCODE_T_R_DEADLOCK_DETECTED),
errmsg("deadlock detected"),
errdetail_internal("%s", clientbuf.data),
errdetail_log("%s", logbuf.data),
errhint("See server log for query details.")));
ubuntu(at)VM-0-17-ubuntu:/workspace/postgres$ ps -ef|grep postgres
ubuntu 2911109 1 0 10:34 ? 00:00:00
/workspace/pgsql/bin/postgres -D ../data
ubuntu 2911110 2911109 0 10:34 ? 00:00:00 postgres: io worker 0
ubuntu 2911111 2911109 0 10:34 ? 00:00:00 postgres: io worker 1
ubuntu 2911112 2911109 0 10:34 ? 00:00:00 postgres: io worker 2
ubuntu 2911113 2911109 0 10:34 ? 00:00:00 postgres: checkpointer
ubuntu 2911114 2911109 0 10:34 ? 00:00:00 postgres: background
writer
ubuntu 2911116 2911109 0 10:34 ? 00:00:00 postgres: walwriter
ubuntu 2911117 2911109 0 10:34 ? 00:00:00 postgres: autovacuum
launcher
ubuntu 2911118 2911109 0 10:34 ? 00:00:00 postgres: logical
replication launcher
ubuntu 2911180 2911109 0 10:34 ? 00:00:00 postgres: ubuntu target
[local] COPY waiting
ubuntu 2911184 2911109 0 10:34 ? 00:00:00 postgres: ubuntu target
[local] idle
ubuntu 2911187 2911109 0 10:34 ? 00:00:00 postgres: ubuntu target
[local] idle
ubuntu 2911188 2911109 0 10:34 ? 00:00:00 postgres: ubuntu target
[local] ALTER TABLE
ubuntu 2911189 2911109 0 10:34 ? 00:00:00 postgres: ubuntu target
[local] SELECT waiting
ubuntu 2911190 2911109 0 10:34 ? 00:00:00 postgres: ubuntu target
[local] idle
ubuntu 2911191 2911109 0 10:34 ? 00:00:00 postgres: ubuntu target
[local] TRUNCATE TABLE waiting
ubuntu 2911192 2911109 0 10:34 ? 00:00:00 postgres: ubuntu target
[local] idle
ubuntu 2911193 2911109 0 10:34 ? 00:00:00 postgres: ubuntu target
[local] SELECT waiting
ubuntu 2911194 2911109 0 10:34 ? 00:00:00 postgres: ubuntu target
[local] idle
gdb -p 2911188 // ALTER TABLE ONLY public.parent2 ADD CONSTRAINT
parent2_pkey PRIMARY KEY (id);
(gdb) bt
#0 0x00007f2f6910878a in __GI___clock_nanosleep (clock_id=clock_id(at)entry=0,
flags=flags(at)entry=0, req=req(at)entry=0x7ffc0e560e60,
rem=rem(at)entry=0x7ffc0e560e60)
at ../sysdeps/unix/sysv/linux/clock_nanosleep.c:78
#1 0x00007f2f6910d677 in __GI___nanosleep (req=req(at)entry=0x7ffc0e560e60,
rem=rem(at)entry=0x7ffc0e560e60) at ../sysdeps/unix/sysv/linux/nanosleep.c:25
#2 0x00007f2f6910d5ae in __sleep (seconds=0) at ../sysdeps/posix/sleep.c:55
#3 0x0000561cd9386100 in DeadLockReport () at deadlock.c:1136
#4 0x0000561cd9389df8 in LockAcquireExtended (locktag=0x7ffc0e5610b0,
lockmode=8, sessionLock=false, dontWait=false, reportMemoryError=true,
locallockp=0x7ffc0e5610a8, logLockFailure=false) at lock.c:1232
#5 0x0000561cd93864bc in LockRelationOid (relid=16473, lockmode=8) at
lmgr.c:115
#6 0x0000561cd8f21b20 in find_inheritance_children_extended
(parentrelId=16463, omit_detached=true, lockmode=8, detached_exist=0x0,
detached_xmin=0x0) at pg_inherits.c:213
#7 0x0000561cd8f217c1 in find_inheritance_children (parentrelId=16463,
lockmode=8) at pg_inherits.c:60
#8 0x0000561cd904dd73 in ATPrepAddPrimaryKey (wqueue=0x7ffc0e561348,
rel=0x7f2f5d7d6240, cmd=0x561cf1d2ee38, recurse=false, lockmode=8,
context=0x7ffc0e561540) at tablecmds.c:9463
#9 0x0000561cd9043906 in ATPrepCmd (wqueue=0x7ffc0e561348,
rel=0x7f2f5d7d6240, cmd=0x561cf1d2ee38, recurse=false, recursing=false,
lockmode=8, context=0x7ffc0e561540) at tablecmds.c:5079
#10 0x0000561cd90432aa in ATController (parsetree=0x561cf1d062e0,
rel=0x7f2f5d7d6240, cmds=0x561cf1d06290, recurse=false, lockmode=8,
context=0x7ffc0e561540) at tablecmds.c:4871
#11 0x0000561cd9042f3b in AlterTable (stmt=0x561cf1d062e0, lockmode=8,
context=0x7ffc0e561540) at tablecmds.c:4533
#12 0x0000561cd93bb7a8 in ProcessUtilitySlow (pstate=0x561cf1d2f9e0,
pstmt=0x561cf1d06390, queryString=0x561cf1d05570 "ALTER TABLE ONLY
public.parent2\n ADD CONSTRAINT parent2_pkey PRIMARY KEY (id);\n\n\n",
context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0,
dest=0x561cf1d06750, qc=0x7ffc0e561ba0) at utility.c:1321
#13 0x0000561cd93bb04e in standard_ProcessUtility (pstmt=0x561cf1d06390,
queryString=0x561cf1d05570 "ALTER TABLE ONLY public.parent2\n ADD
CONSTRAINT parent2_pkey PRIMARY KEY (id);\n\n\n", readOnlyTree=false,
context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0,
dest=0x561cf1d06750, qc=0x7ffc0e561ba0) at utility.c:1070
#14 0x0000561cd93b9f4c in ProcessUtility (pstmt=0x561cf1d06390,
queryString=0x561cf1d05570 "ALTER TABLE ONLY public.parent2\n ADD
CONSTRAINT parent2_pkey PRIMARY KEY (id);\n\n\n", readOnlyTree=false,
context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0,
dest=0x561cf1d06750, qc=0x7ffc0e561ba0) at utility.c:523
#15 0x0000561cd93b87a9 in PortalRunUtility (portal=0x561cf1d85d90,
pstmt=0x561cf1d06390, isTopLevel=true, setHoldSnapshot=false,
dest=0x561cf1d06750, qc=0x7ffc0e561ba0) at pquery.c:1185
#16 0x0000561cd93b8a52 in PortalRunMulti (portal=0x561cf1d85d90,
isTopLevel=true, setHoldSnapshot=false, dest=0x561cf1d06750,
altdest=0x561cf1d06750, qc=0x7ffc0e561ba0) at pquery.c:1349
#17 0x0000561cd93b7e73 in PortalRun (portal=0x561cf1d85d90,
count=9223372036854775807, isTopLevel=true, dest=0x561cf1d06750,
altdest=0x561cf1d06750, qc=0x7ffc0e561ba0) at pquery.c:820
#18 0x0000561cd93b037e in exec_simple_query (query_string=0x561cf1d05570
"ALTER TABLE ONLY public.parent2\n ADD CONSTRAINT parent2_pkey PRIMARY
KEY (id);\n\n\n") at postgres.c:1274
#19 0x0000561cd93b5b46 in PostgresMain (dbname=0x561cf1d3f580 "target",
username=0x561cf1d3f568 "ubuntu") at postgres.c:4771
#20 0x0000561cd93abab7 in BackendMain (startup_data=0x7ffc0e561e50,
startup_data_len=24) at backend_startup.c:124
#21 0x0000561cd92abf09 in postmaster_child_launch (child_type=B_BACKEND,
child_slot=2, startup_data=0x7ffc0e561e50, startup_data_len=24,
client_sock=0x7ffc0e561eb0) at launch_backend.c:290
#22 0x0000561cd92b2946 in BackendStartup (client_sock=0x7ffc0e561eb0) at
postmaster.c:3580
#23 0x0000561cd92afeb1 in ServerLoop () at postmaster.c:1702
#24 0x0000561cd92af7a2 in PostmasterMain (argc=3, argv=0x561cf1cffb00) at
postmaster.c:1400
#25 0x0000561cd914cf06 in main (argc=3, argv=0x561cf1cffb00) at main.c:227
gdb -p 2911180 // COPY public.c22 (id, ref, b) FROM stdin;
(gdb) bt
#0 0x00007f2f69148dea in epoll_wait (epfd=5, events=0x561cf1d003a8,
maxevents=1, timeout=-1) at ../sysdeps/unix/sysv/linux/epoll_wait.c:30
#1 0x0000561cd938106f in WaitEventSetWaitBlock (set=0x561cf1d00340,
cur_timeout=-1, occurred_events=0x7ffc0e561000, nevents=1) at
waiteventset.c:1190
#2 0x0000561cd9380f74 in WaitEventSetWait (set=0x561cf1d00340, timeout=-1,
occurred_events=0x7ffc0e561000, nevents=1, wait_event_info=50331648) at
waiteventset.c:1138
#3 0x0000561cd936f6a5 in WaitLatch (latch=0x7f2f665629e4, wakeEvents=33,
timeout=0, wait_event_info=50331648) at latch.c:194
#4 0x0000561cd939fdbc in ProcSleep (locallock=0x561cf1d63560) at
proc.c:1454
#5 0x0000561cd938b2c6 in WaitOnLock (locallock=0x561cf1d63560,
owner=0x561cf1d44040) at lock.c:1968
#6 0x0000561cd9389dbd in LockAcquireExtended (locktag=0x7ffc0e5613f0,
lockmode=1, sessionLock=false, dontWait=false, reportMemoryError=true,
locallockp=0x7ffc0e5613e8, logLockFailure=false) at lock.c:1217
#7 0x0000561cd93864bc in LockRelationOid (relid=16463, lockmode=1) at
lmgr.c:115
#8 0x0000561cd8daf922 in relation_open (relationId=16463, lockmode=1) at
relation.c:55
#9 0x0000561cd95a29f3 in generate_partition_qual (rel=0x7f2f5d7d6640) at
partcache.c:362
#10 0x0000561cd95a28b8 in RelationGetPartitionQual (rel=0x7f2f5d7d6640) at
partcache.c:283
#11 0x0000561cd90bd59f in ExecPartitionCheck (resultRelInfo=0x561cf1d2ead8,
slot=0x561cf1d33af8, estate=0x561cf1dde450, emitError=true) at
execMain.c:1952
#12 0x0000561cd8fd1c9f in CopyFrom (cstate=0x561cf1de23a8) at
copyfrom.c:1368
#13 0x0000561cd8fccd30 in DoCopy (pstate=0x561cf1d2f9e0,
stmt=0x561cf1d06140, stmt_location=0, stmt_len=39,
processed=0x7ffc0e561830) at copy.c:306
#14 0x0000561cd93ba623 in standard_ProcessUtility (pstmt=0x561cf1d06210,
queryString=0x561cf1d05570 "COPY public.c22 (id, ref, b) FROM stdin;\n",
readOnlyTree=false, context=PROCESS_UTILITY_TOPLEVEL, params=0x0,
queryEnv=0x0, dest=0x561cf1d065d0, qc=0x7ffc0e561ba0) at utility.c:738
#15 0x0000561cd93b9f4c in ProcessUtility (pstmt=0x561cf1d06210,
queryString=0x561cf1d05570 "COPY public.c22 (id, ref, b) FROM stdin;\n",
readOnlyTree=false, context=PROCESS_UTILITY_TOPLEVEL, params=0x0,
queryEnv=0x0,
dest=0x561cf1d065d0, qc=0x7ffc0e561ba0) at utility.c:523
#16 0x0000561cd93b87a9 in PortalRunUtility (portal=0x561cf1d85d90,
pstmt=0x561cf1d06210, isTopLevel=true, setHoldSnapshot=false,
dest=0x561cf1d065d0, qc=0x7ffc0e561ba0) at pquery.c:1185
#17 0x0000561cd93b8a52 in PortalRunMulti (portal=0x561cf1d85d90,
isTopLevel=true, setHoldSnapshot=false, dest=0x561cf1d065d0,
altdest=0x561cf1d065d0, qc=0x7ffc0e561ba0) at pquery.c:1349
#18 0x0000561cd93b7e73 in PortalRun (portal=0x561cf1d85d90,
count=9223372036854775807, isTopLevel=true, dest=0x561cf1d065d0,
altdest=0x561cf1d065d0, qc=0x7ffc0e561ba0) at pquery.c:820
#19 0x0000561cd93b037e in exec_simple_query (query_string=0x561cf1d05570
"COPY public.c22 (id, ref, b) FROM stdin;\n") at postgres.c:1274
#20 0x0000561cd93b5b46 in PostgresMain (dbname=0x561cf1d3f580 "target",
username=0x561cf1d3f568 "ubuntu") at postgres.c:4771
#21 0x0000561cd93abab7 in BackendMain (startup_data=0x7ffc0e561e50,
startup_data_len=24) at backend_startup.c:124
#22 0x0000561cd92abf09 in postmaster_child_launch (child_type=B_BACKEND,
child_slot=1, startup_data=0x7ffc0e561e50, startup_data_len=24,
client_sock=0x7ffc0e561eb0) at launch_backend.c:290
#23 0x0000561cd92b2946 in BackendStartup (client_sock=0x7ffc0e561eb0) at
postmaster.c:3580
#24 0x0000561cd92afeb1 in ServerLoop () at postmaster.c:1702
#25 0x0000561cd92af7a2 in PostmasterMain (argc=3, argv=0x561cf1cffb00) at
postmaster.c:1400
#26 0x0000561cd914cf06 in main (argc=3, argv=0x561cf1cffb00) at main.c:227
The alter table session do check its children not-null constraint using
lockmod=8, and the copy session do get partition_qual to lock parent using
lockmode =1.
I wonder if we have to use the same lockmode for checking children's
not-null constraint.
--
Thanks,
Tender Wang
From | Date | Subject | |
---|---|---|---|
Next Message | Xuneng Zhou | 2025-04-15 04:02:25 | Re: Proposal: Limitations of palloc inside checkpointer |
Previous Message | Tom Lane | 2025-04-15 02:58:28 | Recent pg_rewind test failures in buildfarm |