Re: not null constraints, again - Mailing list pgsql-hackers

From Tender Wang
Subject Re: not null constraints, again
Date
Msg-id CAHewXNk8B4S1W0ZUcEyBWOLcPLLc3Qvi+aiz7BW4Mn_gC_15yQ@mail.gmail.com
Whole thread Raw
In response to Re: not null constraints, again  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: not null constraints, again
List pgsql-hackers


Tom Lane <tgl@sss.pgh.pa.us> 于2025年4月15日周二 05:39写道:
Alvaro Herrera <alvherre@alvh.no-ip.org> writes:
> On 2025-Apr-14, Tom Lane wrote:
>> I would not have expected that adding pg_constraint rows implies
>> stronger locks than what ALTER ADD PRIMARY KEY was using before,
>> and I suspect that doing so will cause more problems than just
>> breaking parallel restore.

> I wasn't aware of this side effect.  I'll investigate this in more
> depth.  I suspect it might be a bug in the way we run through ALTER
> TABLE for the primary key.

After further thought it occurs to me that it might not be a case
of "we get stronger locks", but a case of "we accidentally get a
weaker lock earlier and then try to upgrade it", thus creating a
possibility of deadlock where before we'd just have blocked till
the other statement cleared.  Still worthy of being fixed if that's
true, though.

I added sleep(1) in the  DeadLockReport() before error report to display the status when a deadlock happened.
bool continue_sleep = true;
do
{
sleep(1);
} while (continue_sleep);
ereport(ERROR,
(errcode(ERRCODE_T_R_DEADLOCK_DETECTED),
errmsg("deadlock detected"),
errdetail_internal("%s", clientbuf.data),
errdetail_log("%s", logbuf.data),
errhint("See server log for query details.")));



ubuntu@VM-0-17-ubuntu:/workspace/postgres$ ps -ef|grep postgres
ubuntu   2911109       1  0 10:34 ?        00:00:00 /workspace/pgsql/bin/postgres -D ../data
ubuntu   2911110 2911109  0 10:34 ?        00:00:00 postgres: io worker 0
ubuntu   2911111 2911109  0 10:34 ?        00:00:00 postgres: io worker 1
ubuntu   2911112 2911109  0 10:34 ?        00:00:00 postgres: io worker 2
ubuntu   2911113 2911109  0 10:34 ?        00:00:00 postgres: checkpointer
ubuntu   2911114 2911109  0 10:34 ?        00:00:00 postgres: background writer
ubuntu   2911116 2911109  0 10:34 ?        00:00:00 postgres: walwriter
ubuntu   2911117 2911109  0 10:34 ?        00:00:00 postgres: autovacuum launcher
ubuntu   2911118 2911109  0 10:34 ?        00:00:00 postgres: logical replication launcher
ubuntu   2911180 2911109  0 10:34 ?        00:00:00 postgres: ubuntu target [local] COPY waiting
ubuntu   2911184 2911109  0 10:34 ?        00:00:00 postgres: ubuntu target [local] idle
ubuntu   2911187 2911109  0 10:34 ?        00:00:00 postgres: ubuntu target [local] idle
ubuntu   2911188 2911109  0 10:34 ?        00:00:00 postgres: ubuntu target [local] ALTER TABLE
ubuntu   2911189 2911109  0 10:34 ?        00:00:00 postgres: ubuntu target [local] SELECT waiting
ubuntu   2911190 2911109  0 10:34 ?        00:00:00 postgres: ubuntu target [local] idle
ubuntu   2911191 2911109  0 10:34 ?        00:00:00 postgres: ubuntu target [local] TRUNCATE TABLE waiting
ubuntu   2911192 2911109  0 10:34 ?        00:00:00 postgres: ubuntu target [local] idle
ubuntu   2911193 2911109  0 10:34 ?        00:00:00 postgres: ubuntu target [local] SELECT waiting
ubuntu   2911194 2911109  0 10:34 ?        00:00:00 postgres: ubuntu target [local] idle

gdb -p 2911188   // ALTER TABLE ONLY public.parent2  ADD CONSTRAINT parent2_pkey PRIMARY KEY (id);
(gdb) bt
#0  0x00007f2f6910878a in __GI___clock_nanosleep (clock_id=clock_id@entry=0, flags=flags@entry=0, req=req@entry=0x7ffc0e560e60, rem=rem@entry=0x7ffc0e560e60) at ../sysdeps/unix/sysv/linux/clock_nanosleep.c:78
#1  0x00007f2f6910d677 in __GI___nanosleep (req=req@entry=0x7ffc0e560e60, rem=rem@entry=0x7ffc0e560e60) at ../sysdeps/unix/sysv/linux/nanosleep.c:25
#2  0x00007f2f6910d5ae in __sleep (seconds=0) at ../sysdeps/posix/sleep.c:55
#3  0x0000561cd9386100 in DeadLockReport () at deadlock.c:1136
#4  0x0000561cd9389df8 in LockAcquireExtended (locktag=0x7ffc0e5610b0, lockmode=8, sessionLock=false, dontWait=false, reportMemoryError=true, locallockp=0x7ffc0e5610a8, logLockFailure=false) at lock.c:1232
#5  0x0000561cd93864bc in LockRelationOid (relid=16473, lockmode=8) at lmgr.c:115
#6  0x0000561cd8f21b20 in find_inheritance_children_extended (parentrelId=16463, omit_detached=true, lockmode=8, detached_exist=0x0, detached_xmin=0x0) at pg_inherits.c:213
#7  0x0000561cd8f217c1 in find_inheritance_children (parentrelId=16463, lockmode=8) at pg_inherits.c:60
#8  0x0000561cd904dd73 in ATPrepAddPrimaryKey (wqueue=0x7ffc0e561348, rel=0x7f2f5d7d6240, cmd=0x561cf1d2ee38, recurse=false, lockmode=8, context=0x7ffc0e561540) at tablecmds.c:9463
#9  0x0000561cd9043906 in ATPrepCmd (wqueue=0x7ffc0e561348, rel=0x7f2f5d7d6240, cmd=0x561cf1d2ee38, recurse=false, recursing=false, lockmode=8, context=0x7ffc0e561540) at tablecmds.c:5079
#10 0x0000561cd90432aa in ATController (parsetree=0x561cf1d062e0, rel=0x7f2f5d7d6240, cmds=0x561cf1d06290, recurse=false, lockmode=8, context=0x7ffc0e561540) at tablecmds.c:4871
#11 0x0000561cd9042f3b in AlterTable (stmt=0x561cf1d062e0, lockmode=8, context=0x7ffc0e561540) at tablecmds.c:4533
#12 0x0000561cd93bb7a8 in ProcessUtilitySlow (pstate=0x561cf1d2f9e0, pstmt=0x561cf1d06390, queryString=0x561cf1d05570 "ALTER TABLE ONLY public.parent2\n    ADD CONSTRAINT parent2_pkey PRIMARY KEY (id);\n\n\n",
    context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0, dest=0x561cf1d06750, qc=0x7ffc0e561ba0) at utility.c:1321
#13 0x0000561cd93bb04e in standard_ProcessUtility (pstmt=0x561cf1d06390, queryString=0x561cf1d05570 "ALTER TABLE ONLY public.parent2\n    ADD CONSTRAINT parent2_pkey PRIMARY KEY (id);\n\n\n", readOnlyTree=false,
    context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0, dest=0x561cf1d06750, qc=0x7ffc0e561ba0) at utility.c:1070
#14 0x0000561cd93b9f4c in ProcessUtility (pstmt=0x561cf1d06390, queryString=0x561cf1d05570 "ALTER TABLE ONLY public.parent2\n    ADD CONSTRAINT parent2_pkey PRIMARY KEY (id);\n\n\n", readOnlyTree=false,
    context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0, dest=0x561cf1d06750, qc=0x7ffc0e561ba0) at utility.c:523
#15 0x0000561cd93b87a9 in PortalRunUtility (portal=0x561cf1d85d90, pstmt=0x561cf1d06390, isTopLevel=true, setHoldSnapshot=false, dest=0x561cf1d06750, qc=0x7ffc0e561ba0) at pquery.c:1185
#16 0x0000561cd93b8a52 in PortalRunMulti (portal=0x561cf1d85d90, isTopLevel=true, setHoldSnapshot=false, dest=0x561cf1d06750, altdest=0x561cf1d06750, qc=0x7ffc0e561ba0) at pquery.c:1349
#17 0x0000561cd93b7e73 in PortalRun (portal=0x561cf1d85d90, count=9223372036854775807, isTopLevel=true, dest=0x561cf1d06750, altdest=0x561cf1d06750, qc=0x7ffc0e561ba0) at pquery.c:820
#18 0x0000561cd93b037e in exec_simple_query (query_string=0x561cf1d05570 "ALTER TABLE ONLY public.parent2\n    ADD CONSTRAINT parent2_pkey PRIMARY KEY (id);\n\n\n") at postgres.c:1274
#19 0x0000561cd93b5b46 in PostgresMain (dbname=0x561cf1d3f580 "target", username=0x561cf1d3f568 "ubuntu") at postgres.c:4771
#20 0x0000561cd93abab7 in BackendMain (startup_data=0x7ffc0e561e50, startup_data_len=24) at backend_startup.c:124
#21 0x0000561cd92abf09 in postmaster_child_launch (child_type=B_BACKEND, child_slot=2, startup_data=0x7ffc0e561e50, startup_data_len=24, client_sock=0x7ffc0e561eb0) at launch_backend.c:290
#22 0x0000561cd92b2946 in BackendStartup (client_sock=0x7ffc0e561eb0) at postmaster.c:3580
#23 0x0000561cd92afeb1 in ServerLoop () at postmaster.c:1702
#24 0x0000561cd92af7a2 in PostmasterMain (argc=3, argv=0x561cf1cffb00) at postmaster.c:1400
#25 0x0000561cd914cf06 in main (argc=3, argv=0x561cf1cffb00) at main.c:227

gdb -p 2911180   // COPY public.c22 (id, ref, b) FROM stdin;
(gdb) bt
#0  0x00007f2f69148dea in epoll_wait (epfd=5, events=0x561cf1d003a8, maxevents=1, timeout=-1) at ../sysdeps/unix/sysv/linux/epoll_wait.c:30
#1  0x0000561cd938106f in WaitEventSetWaitBlock (set=0x561cf1d00340, cur_timeout=-1, occurred_events=0x7ffc0e561000, nevents=1) at waiteventset.c:1190
#2  0x0000561cd9380f74 in WaitEventSetWait (set=0x561cf1d00340, timeout=-1, occurred_events=0x7ffc0e561000, nevents=1, wait_event_info=50331648) at waiteventset.c:1138
#3  0x0000561cd936f6a5 in WaitLatch (latch=0x7f2f665629e4, wakeEvents=33, timeout=0, wait_event_info=50331648) at latch.c:194
#4  0x0000561cd939fdbc in ProcSleep (locallock=0x561cf1d63560) at proc.c:1454
#5  0x0000561cd938b2c6 in WaitOnLock (locallock=0x561cf1d63560, owner=0x561cf1d44040) at lock.c:1968
#6  0x0000561cd9389dbd in LockAcquireExtended (locktag=0x7ffc0e5613f0, lockmode=1, sessionLock=false, dontWait=false, reportMemoryError=true, locallockp=0x7ffc0e5613e8, logLockFailure=false) at lock.c:1217
#7  0x0000561cd93864bc in LockRelationOid (relid=16463, lockmode=1) at lmgr.c:115
#8  0x0000561cd8daf922 in relation_open (relationId=16463, lockmode=1) at relation.c:55
#9  0x0000561cd95a29f3 in generate_partition_qual (rel=0x7f2f5d7d6640) at partcache.c:362
#10 0x0000561cd95a28b8 in RelationGetPartitionQual (rel=0x7f2f5d7d6640) at partcache.c:283
#11 0x0000561cd90bd59f in ExecPartitionCheck (resultRelInfo=0x561cf1d2ead8, slot=0x561cf1d33af8, estate=0x561cf1dde450, emitError=true) at execMain.c:1952
#12 0x0000561cd8fd1c9f in CopyFrom (cstate=0x561cf1de23a8) at copyfrom.c:1368
#13 0x0000561cd8fccd30 in DoCopy (pstate=0x561cf1d2f9e0, stmt=0x561cf1d06140, stmt_location=0, stmt_len=39, processed=0x7ffc0e561830) at copy.c:306
#14 0x0000561cd93ba623 in standard_ProcessUtility (pstmt=0x561cf1d06210, queryString=0x561cf1d05570 "COPY public.c22 (id, ref, b) FROM stdin;\n", readOnlyTree=false, context=PROCESS_UTILITY_TOPLEVEL, params=0x0,
    queryEnv=0x0, dest=0x561cf1d065d0, qc=0x7ffc0e561ba0) at utility.c:738
#15 0x0000561cd93b9f4c in ProcessUtility (pstmt=0x561cf1d06210, queryString=0x561cf1d05570 "COPY public.c22 (id, ref, b) FROM stdin;\n", readOnlyTree=false, context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0,
    dest=0x561cf1d065d0, qc=0x7ffc0e561ba0) at utility.c:523
#16 0x0000561cd93b87a9 in PortalRunUtility (portal=0x561cf1d85d90, pstmt=0x561cf1d06210, isTopLevel=true, setHoldSnapshot=false, dest=0x561cf1d065d0, qc=0x7ffc0e561ba0) at pquery.c:1185
#17 0x0000561cd93b8a52 in PortalRunMulti (portal=0x561cf1d85d90, isTopLevel=true, setHoldSnapshot=false, dest=0x561cf1d065d0, altdest=0x561cf1d065d0, qc=0x7ffc0e561ba0) at pquery.c:1349
#18 0x0000561cd93b7e73 in PortalRun (portal=0x561cf1d85d90, count=9223372036854775807, isTopLevel=true, dest=0x561cf1d065d0, altdest=0x561cf1d065d0, qc=0x7ffc0e561ba0) at pquery.c:820
#19 0x0000561cd93b037e in exec_simple_query (query_string=0x561cf1d05570 "COPY public.c22 (id, ref, b) FROM stdin;\n") at postgres.c:1274
#20 0x0000561cd93b5b46 in PostgresMain (dbname=0x561cf1d3f580 "target", username=0x561cf1d3f568 "ubuntu") at postgres.c:4771
#21 0x0000561cd93abab7 in BackendMain (startup_data=0x7ffc0e561e50, startup_data_len=24) at backend_startup.c:124
#22 0x0000561cd92abf09 in postmaster_child_launch (child_type=B_BACKEND, child_slot=1, startup_data=0x7ffc0e561e50, startup_data_len=24, client_sock=0x7ffc0e561eb0) at launch_backend.c:290
#23 0x0000561cd92b2946 in BackendStartup (client_sock=0x7ffc0e561eb0) at postmaster.c:3580
#24 0x0000561cd92afeb1 in ServerLoop () at postmaster.c:1702
#25 0x0000561cd92af7a2 in PostmasterMain (argc=3, argv=0x561cf1cffb00) at postmaster.c:1400
#26 0x0000561cd914cf06 in main (argc=3, argv=0x561cf1cffb00) at main.c:227

The alter table session do check its children not-null constraint using lockmod=8, and the copy session do get partition_qual to lock parent using lockmode =1.
I wonder if we have to use the same lockmode for checking children's not-null constraint.


--
Thanks,
Tender Wang

pgsql-hackers by date:

Previous
From: Tom Lane
Date:
Subject: Recent pg_rewind test failures in buildfarm
Next
From: Xuneng Zhou
Date:
Subject: Re: Proposal: Limitations of palloc inside checkpointer