Re: Need help debugging SIGBUS crashes - Mailing list pgsql-hackers
| From | Peter 'PMc' Much |
|---|---|
| Subject | Re: Need help debugging SIGBUS crashes |
| Date | |
| Msg-id | ab7IuP9NApo1e8Nj@disp.intra.daemon.contact Whole thread Raw |
| In response to | Re: Need help debugging SIGBUS crashes (Tom Lane <tgl@sss.pgh.pa.us>) |
| List | pgsql-hackers |
On Tue, Mar 17, 2026 at 04:56:48PM -0400, Tom Lane wrote:
! "Peter 'PMc' Much" <pmc@citylink.dinoex.sub.org> writes:
! > On Tue, Mar 17, 2026 at 10:12:07AM -0400, Tom Lane wrote:
! > ! Why it was okay in older FreeBSD and not so much in v14, who knows?
!
! > Maybe it wasn't. Here it appeared out of thin air in February, while
! > the system was upgraded from 13.5 to 14.3 in July'25, and did run
! > without problems for these eight months.
! > So this is not directly or solely related to FBSD R.14, and while it
! > happens more likely during massive memory use, but this also is not
! > stingent. Neither did I find any other solid determining condition.
!
! Yeah, it seems likely that there is some additional triggering
! condition that we don't understand; otherwise there would be more
! people complaining than just you. But if updating to PG16 gets
! rid of the problem, I'm not sure it is worth the time to try to
! narrow down what that additional trigger is.
!
! Of course, if you still see the issue after upgrading, we'll have
! to dig harder.
Sadly, here it is again with PG r16.13, at the same place as before.
* thread #1, name = 'postgres', stop reason = signal SIGBUS
* frame #0: 0x000000082bba3159 libc.so.7`extent_arena_get [inlined] extent_arena_ind_get(extent=0x79f696918ed45a56)
atextent_inlines.h:40:23
frame #1: 0x000000082bba3159 libc.so.7`extent_arena_get(extent=0x79f696918ed45a56) at extent_inlines.h:49:23
frame #2: 0x000000082bba3a14 libc.so.7`extent_can_coalesce(arena=0x00003d43fd800980, extents=0x00003d43fd8058d8,
inner=0x00003d43fd90f080,outer=0x79f696918ed45a56) at jemalloc_extent.c:1565:6
frame #3: 0x000000082bba363b libc.so.7`extent_try_coalesce_impl(tsdn=0x00003d43fd67a090, arena=0x00003d43fd800980,
r_extent_hooks=0x0000000820af5198,rtree_ctx=0x00003d43fd67a0c0, extents=0x00003d43fd8058d8, extent=0x00003d43fd90f080,
coalesced=0x0000000000000000,growing_retained=true, inactive_only=false) at jemalloc_extent.c:1628:24
frame #4: 0x000000082bba3448 libc.so.7`extent_try_coalesce(tsdn=0x00003d43fd67a090, arena=0x00003d43fd800980,
r_extent_hooks=0x0000000820af5198,rtree_ctx=0x00003d43fd67a0c0, extents=0x00003d43fd8058d8, extent=0x00003d43fd90f080,
coalesced=0x0000000000000000,growing_retained=true) at jemalloc_extent.c:1680:9
frame #5: 0x000000082bba055f libc.so.7`extent_record(tsdn=0x00003d43fd67a090, arena=0x00003d43fd800980,
r_extent_hooks=0x0000000820af5198,extents=0x00003d43fd8058d8, extent=0x00003d43fd90f080, growing_retained=true) at
jemalloc_extent.c:1719:12
frame #6: 0x000000082bba6043 libc.so.7`extent_grow_retained(tsdn=0x00003d43fd67a090, arena=0x00003d43fd800980,
r_extent_hooks=0x0000000820af5198,size=65536, pad=4096, alignment=64, slab=false, szind=44, zero=0x0000000820af51ef,
commit=0x0000000820af5197)at jemalloc_extent.c:1385:4
frame #7: 0x000000082bba0f3f libc.so.7`extent_alloc_retained(tsdn=0x00003d43fd67a090, arena=0x00003d43fd800980,
r_extent_hooks=0x0000000820af5198,new_addr=0x0000000000000000, size=65536, pad=4096, alignment=64, slab=false,
szind=44,zero=0x0000000820af51ef, commit=0x0000000820af5197) at jemalloc_extent.c:1482:12
frame #8: 0x000000082bba0d39 libc.so.7`__je_extent_alloc_wrapper(tsdn=0x00003d43fd67a090, arena=0x00003d43fd800980,
r_extent_hooks=0x0000000820af5198,new_addr=0x0000000000000000, size=65536, pad=4096, alignment=64, slab=false,
szind=44,zero=0x0000000820af51ef, commit=0x0000000820af5197) at jemalloc_extent.c:1541:21
frame #9: 0x000000082bb7a87d libc.so.7`__je_arena_extent_alloc_large(tsdn=<unavailable>, arena=0x00003d43fd800980,
usize=65536,alignment=<unavailable>, zero=0x0000000820af51ef) at jemalloc_arena.c:448:12
frame #10: 0x000000082bba77b0 libc.so.7`__je_large_palloc(tsdn=0x00003d43fd67a090, arena=<unavailable>,
usize=<unavailable>,alignment=64, zero=<unavailable>) at jemalloc_large.c:47:43
frame #11: 0x000000082bba7612 libc.so.7`__je_large_malloc(tsdn=<unavailable>, arena=<unavailable>,
usize=<unavailable>,zero=<unavailable>) at jemalloc_large.c:17:9 [artificial]
frame #12: 0x000000082bb7c477 libc.so.7`__je_arena_malloc_hard(tsdn=<unavailable>, arena=<unavailable>,
size=<unavailable>,ind=<unavailable>, zero=<unavailable>) at jemalloc_arena.c:1528:9 [artificial]
frame #13: 0x000000082bb6f5a7 libc.so.7`__je_malloc_default [inlined] arena_malloc(tsdn=0x00003d43fd67a090,
arena=0x0000000000000000,size=<unavailable>, ind=<unavailable>, zero=false, tcache=0x00003d43fd67a280, slow_path=false)
atarena_inlines_b.h:176:9
frame #14: 0x000000082bb6f598 libc.so.7`__je_malloc_default [inlined] iallocztm(tsdn=0x00003d43fd67a090,
size=<unavailable>,ind=<unavailable>, zero=false, tcache=0x00003d43fd67a280, is_internal=false,
arena=0x0000000000000000,slow_path=false) at jemalloc_internal_inlines_c.h:53:8
frame #15: 0x000000082bb6f598 libc.so.7`__je_malloc_default [inlined] imalloc_no_sample(sopts=<unavailable>,
dopts=<unavailable>,tsd=0x00003d43fd67a090, size=<unavailable>, usize=65536, ind=<unavailable>) at
jemalloc_jemalloc.c:1953:9
frame #16: 0x000000082bb6f598 libc.so.7`__je_malloc_default [inlined] imalloc_body(sopts=<unavailable>,
dopts=<unavailable>,tsd=0x00003d43fd67a090) at jemalloc_jemalloc.c:2153:16
frame #17: 0x000000082bb6f598 libc.so.7`__je_malloc_default [inlined] imalloc(sopts=<unavailable>,
dopts=<unavailable>)at jemalloc_jemalloc.c:2262:10
frame #18: 0x000000082bb6f4ca libc.so.7`__je_malloc_default(size=<unavailable>) at jemalloc_jemalloc.c:2293:2
frame #19: 0x000000082bb6fa2d libc.so.7`__malloc(size=<unavailable>) at jemalloc_jemalloc.c:0 [artificial]
frame #20: 0x000000082bad08a4 libc.so.7`_dns_gethostbyaddr(rval=0x0000000820af5a90, cb_data=<unavailable>,
ap=<unavailable>)at gethostbydns.c:619:13
frame #21: 0x000000082badeab2 libc.so.7`_nsdispatch(retval=0x0000000820af5a90, disp_tab=0x000000082bbd8800,
database="",method_name="", defaults=<unavailable>) at nsdispatch.c:726:14
frame #22: 0x000000082bad2be8 libc.so.7`gethostbyaddr_r(addr=0x0000000820af5ae0, len=<unavailable>,
af=<unavailable>,hp=0x000000082bbebda0, buf="", buflen=8800, result=0x0000000820af5a90, h_errnop=0x0000000820af5a8c) at
gethostnamadr.c:650:9
frame #23: 0x000000082bad34f9 libc.so.7`gethostbyaddr(addr=0x0000000820af5ae0, len=16, af=28) at
gethostnamadr.c:700:6
frame #24: 0x000000082baddcd8 libc.so.7`getipnodebyaddr(src=0x0000000820af5ae0, len=<unavailable>, af=28,
errp=0x0000000820af5b50)at name6.c:378:7
frame #25: 0x000000082bad4242 libc.so.7`getnameinfo_inet(afd=0x000000082bbd8980, sa=0x00003d43fda5e098,
salen=<unavailable>,host=<unavailable>, hostlen=<unavailable>, serv=<unavailable>, servlen=0, flags=4) at
getnameinfo.c:311:8
frame #26: 0x000000082bad405d libc.so.7`getnameinfo(sa=<unavailable>, salen=<unavailable>, host=<unavailable>,
hostlen=<unavailable>,serv=<unavailable>, servlen=<unavailable>, flags=4) at getnameinfo.c:157:10
frame #27: 0x0000000000a85081 postgres`pg_getnameinfo_all + 177
frame #28: 0x0000000000774262 postgres`hba_getauthmethod + 1202
frame #29: 0x000000000076a412 postgres`ClientAuthentication + 50
frame #30: 0x0000000000a49fd1 postgres`InitPostgres + 2273
frame #31: 0x00000000008eac4d postgres`PostgresMain + 285
frame #32: 0x0000000000857108 postgres`BackendRun + 40
frame #33: 0x0000000000855a1a postgres`ServerLoop + 7866
frame #34: 0x000000000085300e postgres`PostmasterMain + 3278
frame #35: 0x000000000077bac3 postgres`main + 803
frame #36: 0x000000082ba72edc libc.so.7`__libc_start1(argc=4, argv=0x0000000820af8700, env=0x0000000820af8728,
cleanup=<unavailable>,mainX=(postgres`main)) at libc_start1.c:180:7
frame #37: 0x0000000000556de4 postgres`_start + 36
This is frame #3, and 'extent_t *next' does not seem to point to an
extent_t:
1601 static extent_t *
1602 extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
1603 extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
1604 extent_t *extent, bool *coalesced, bool growing_retained,
1605 bool inactive_only) {
1606 /*
1607 * We avoid checking / locking inactive neighbors for large size
1608 * classes, since they are eagerly coalesced on deallocation which can
1609 * cause lock contention.
1610 */
1611 /*
1612 * Continue attempting to coalesce until failure, to protect against
1613 * races with other threads that are thwarted by this one.
1614 */
1615 bool again;
1616 do {
1617 again = false;
1618
1619 /* Try to coalesce forward. */
1620 extent_t *next = extent_lock_from_addr(tsdn, rtree_ctx,
1621 extent_past_get(extent), inactive_only);
1622 if (next != NULL) {
1623 /*
1624 * extents->mtx only protects against races for
1625 * like-state extents, so call extent_can_coalesce()
1626 * before releasing next's pool lock.
1627 */
1628 bool can_coalesce = extent_can_coalesce(arena, extents,
1629 extent, next);
(lldb) p next
(extent_t *) 0x79f696918ed45a56
(lldb) p *next
error: Couldn't apply expression side effects : Couldn't dematerialize a result variable: couldn't read its memory
(lldb) p extent
(extent_t *) 0x00003d43fd90f080
(lldb) p *extent
(extent_t) {
e_bits = 8796153896960
e_addr = 0x00003d43fe211000
= (e_size_esn = 2551808, e_bsize = 2551808)
ql_link = {
qre_next = 0x00003d43fd90f080
qre_prev = 0x00003d43fd90f080
}
ph_link = {
phn_prev = NULL
phn_next = NULL
phn_lchild = NULL
}
= {
e_slab_data = {
bitmap = ([0] = 0, [1] = 0, [2] = 0, [3] = 0, [4] = 0, [5] = 0, [6] = 0, [7] = 0)
}
= {
e_alloc_time = (ns = 0)
e_prof_tctx = (repr = 0x0000000000000000)
}
}
}
pgsql-hackers by date: