Re: BUG #17255: Server crashes in index_delete_sort_cmp() due to race condition with vacuum - Mailing list pgsql-bugs

From Dmitry Dolgov
Subject Re: BUG #17255: Server crashes in index_delete_sort_cmp() due to race condition with vacuum
Date
Msg-id 20211031192051.e3r2uxft3bulymrr@localhost
Whole thread Raw
In response to Re: BUG #17255: Server crashes in index_delete_sort_cmp() due to race condition with vacuum  (Andres Freund <andres@anarazel.de>)
Responses Re: BUG #17255: Server crashes in index_delete_sort_cmp() due to race condition with vacuum
List pgsql-bugs
> On Sat, Oct 30, 2021 at 02:39:48PM -0700, Andres Freund wrote:
> > In my case it crashed on pg_unreachable (which is an abort, when asserts
> > are enabled) inside index_delete_sort_cmp. It seems like item pointers
> > to compare both have the same block and offset number. In the view of
> > the recent discussions I was thinking it could be somehow related to the
> > issues with duplicated TIDs, but delstate->deltids doesn't in fact have
> > any duplicated entries -- so not sure about that, still investigating
> > the core dump.
>
> I suspect this is the same bug as #17245. Could you check if it's fixed by
> https://www.postgresql.org/message-id/CAH2-WzkN5aESSLfK7-yrYgsXxYUi__VzG4XpZFwXm98LUtoWuQ%40mail.gmail.com
>
> The crash is somewhere in pg_class, which is also manually VACUUMed by the
> test, which could trigger the issue we found in the other thread. The likely
> reason the loop in the repro is needed is that that'll push one of the indexes
> on pg_class over the 512kb/min_parallel_index_scan_size boundary to start
> using paralell vacuum.

I've applied both patches from Peter, the fix itself and
index-points-to-LP_UNUSED-item assertions. Now it doesn't crash on
pg_unreachable, but hits those extra assertions in the second patch:

    #0  0x00007f251875f2fb in raise () from /lib64/libc.so.6
    #1  0x00007f2518748ef6 in abort () from /lib64/libc.so.6
    #2  0x000056387b62a4c7 in ExceptionalCondition (conditionName=0x56387b6be622 "ItemIdIsUsed(iid)",
errorType=0x56387b6bc849"FailedAssertion", fileName=0x56387b6bc928 "heapam.c", lineNumber=7467) at assert.c:69
 
    #3  0x000056387afb4ba9 in heap_index_delete_tuples (rel=0x7f25195f8e20, delstate=0x7ffe817bdf00) at heapam.c:7467
    #4  0x000056387afe4a38 in table_index_delete_tuples (rel=0x7f25195f8e20, delstate=0x7ffe817bdf00) at
../../../../src/include/access/tableam.h:1327
    #5  0x000056387afe83b7 in _bt_delitems_delete_check (rel=0x7f2519601880, buf=182, heapRel=0x7f25195f8e20,
delstate=0x7ffe817bdf00)at nbtpage.c:1541
 
    #6  0x000056387afe4452 in _bt_simpledel_pass (rel=0x7f2519601880, buffer=182, heapRel=0x7f25195f8e20,
deletable=0x7ffe817bdfb0,ndeletable=55, newitem=0x56387c05cfe0, minoff=1, maxoff=271) at nbtinsert.c:2896
 
    #7  0x000056387afe3cb1 in _bt_delete_or_dedup_one_page (rel=0x7f2519601880, heapRel=0x7f25195f8e20,
insertstate=0x7ffe817be3b0,simpleonly=false, checkingunique=true, uniquedup=false, indexUnchanged=false) at
nbtinsert.c:2709
    #8  0x000056387afdea4b in _bt_findinsertloc (rel=0x7f2519601880, insertstate=0x7ffe817be3b0, checkingunique=true,
indexUnchanged=false,stack=0x56387c05d008, heapRel=0x7f25195f8e20) at nbtinsert.c:901
 
    #9  0x000056387afdd3c7 in _bt_doinsert (rel=0x7f2519601880, itup=0x56387c05cfe0, checkUnique=UNIQUE_CHECK_YES,
indexUnchanged=false,heapRel=0x7f25195f8e20) at nbtinsert.c:255
 
    #10 0x000056387afecfee in btinsert (rel=0x7f2519601880, values=0x7ffe817be510, isnull=0x7ffe817be4f0,
ht_ctid=0x56387c05b994,heapRel=0x7f25195f8e20, checkUnique=UNIQUE_CHECK_YES, indexUnchanged=false,
indexInfo=0x56387c05cec8)at nbtree.c:199
 
    #11 0x000056387afd7f05 in index_insert (indexRelation=0x7f2519601880, values=0x7ffe817be510, isnull=0x7ffe817be4f0,
heap_t_ctid=0x56387c05b994,heapRelation=0x7f25195f8e20, checkUnique=UNIQUE_CHECK_YES, indexUnchanged=false,
indexInfo=0x56387c05cec8)at indexam.c:193
 
    #12 0x000056387b08c396 in CatalogIndexInsert (indstate=0x56387bfc0388, heapTuple=0x56387c05b990) at indexing.c:158
    #13 0x000056387b08c51a in CatalogTupleInsert (heapRel=0x7f25195f8e20, tup=0x56387c05b990) at indexing.c:231
    #14 0x000056387b07ed40 in InsertPgClassTuple (pg_class_desc=0x7f25195f8e20, new_rel_desc=0x7f251960fa18,
new_rel_oid=957915,relacl=0, reloptions=0) at heap.c:984
 
    #15 0x000056387b07eec6 in AddNewRelationTuple (pg_class_desc=0x7f25195f8e20, new_rel_desc=0x7f251960fa18,
new_rel_oid=957915,new_type_oid=957917, reloftype=0, relowner=10, relkind=114 'r', relfrozenxid=412531, relminmxid=1,
relacl=0,reloptions=0) at heap.c:1056
 
    #16 0x000056387b07f60d in heap_create_with_catalog (relname=0x7ffe817bec60 "tmp", relnamespace=16686,
reltablespace=0,relid=957915, reltypeid=0, reloftypeid=0, ownerid=10, accessmtd=2, tupdesc=0x56387bfbb6b0,
cooked_constraints=0x0,relkind=114 'r', relpersistence=116 't', shared_relation=false, mapped_relation=false,
oncommit=ONCOMMIT_NOOP,reloptions=0, use_user_acl=true, allow_system_table_mods=false, is_internal=false, relrewrite=0,
typaddress=0x0)at heap.c:1409
 
    #17 0x000056387b1a9bb7 in DefineRelation (stmt=0x56387bf98620, relkind=114 'r', ownerId=10, typaddress=0x0,
queryString=0x56387bf97810"CREATE TEMPORARY TABLE tmp (a int PRIMARY KEY);") at tablecmds.c:933
 
    #18 0x000056387b47fde1 in ProcessUtilitySlow (pstate=0x56387bfb9890, pstmt=0x56387bf989d0,
queryString=0x56387bf97810"CREATE TEMPORARY TABLE tmp (a int PRIMARY KEY);", context=PROCESS_UTILITY_TOPLEVEL,
params=0x0,queryEnv=0x0, dest=0x56387bf98ac0, qc=0x7ffe817bf4a0) at utility.c:1163
 
    #19 0x000056387b47fb3d in standard_ProcessUtility (pstmt=0x56387bf989d0, queryString=0x56387bf97810 "CREATE
TEMPORARYTABLE tmp (a int PRIMARY KEY);", readOnlyTree=false, context=PROCESS_UTILITY_TOPLEVEL, params=0x0,
queryEnv=0x0,dest=0x56387bf98ac0, qc=0x7ffe817bf4a0) at utility.c:1066
 
    #20 0x000056387b47eb01 in ProcessUtility (pstmt=0x56387bf989d0, queryString=0x56387bf97810 "CREATE TEMPORARY TABLE
tmp(a int PRIMARY KEY);", readOnlyTree=false, context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0,
dest=0x56387bf98ac0,qc=0x7ffe817bf4a0) at utility.c:527
 
    #21 0x000056387b47d5e8 in PortalRunUtility (portal=0x56387bffb860, pstmt=0x56387bf989d0, isTopLevel=true,
setHoldSnapshot=false,dest=0x56387bf98ac0, qc=0x7ffe817bf4a0) at pquery.c:1155
 
    #22 0x000056387b47d858 in PortalRunMulti (portal=0x56387bffb860, isTopLevel=true, setHoldSnapshot=false,
dest=0x56387bf98ac0,altdest=0x56387bf98ac0, qc=0x7ffe817bf4a0) at pquery.c:1312
 
    #23 0x000056387b47ccdb in PortalRun (portal=0x56387bffb860, count=9223372036854775807, isTopLevel=true,
run_once=true,dest=0x56387bf98ac0, altdest=0x56387bf98ac0, qc=0x7ffe817bf4a0) at pquery.c:788
 
    #24 0x000056387b475fad in exec_simple_query (query_string=0x56387bf97810 "CREATE TEMPORARY TABLE tmp (a int PRIMARY
KEY);")at postgres.c:1214
 
    #25 0x000056387b47ab2b in PostgresMain (dbname=0x56387bfc3748 "regression", username=0x56387bfc3728 "erthalion") at
postgres.c:4497
    #26 0x000056387b3a636f in BackendRun (port=0x56387bfbb460) at postmaster.c:4560
    #27 0x000056387b3a5c60 in BackendStartup (port=0x56387bfbb460) at postmaster.c:4288
    #28 0x000056387b3a1e81 in ServerLoop () at postmaster.c:1801
    #29 0x000056387b3a1630 in PostmasterMain (argc=3, argv=0x56387bf91ca0) at postmaster.c:1473
    #30 0x000056387b297f62 in main (argc=3, argv=0x56387bf91ca0) at main.c:198

The ItemId in question:

    >>> p *iid
    $2 = {
      lp_off = 0,
      lp_flags = 0,
      lp_len = 0
    }



pgsql-bugs by date:

Previous
From: Andres Freund
Date:
Subject: Re: CREATE INDEX CONCURRENTLY does not index prepared xact's data
Next
From: Tom Lane
Date:
Subject: Re: BUG #17254: Crash with 0xC0000409 in pg_stat_statements when pg_stat_tmp\pgss_query_texts.stat exceeded 2GB.