ERROR: could not resize shared memory segment...No space left ondevice - Mailing list pgsql-hackers
From | Justin Pryzby |
---|---|
Subject | ERROR: could not resize shared memory segment...No space left ondevice |
Date | |
Msg-id | 20191216184906.GA2082@telsasoft.com Whole thread Raw |
Responses |
Re: ERROR: could not resize shared memory segment...No space lefton device
|
List | pgsql-hackers |
A customer's report query hit this error. ERROR: could not resize shared memory segment "/PostgreSQL.2011322019" to 134217728 bytes: No space left on device I found: https://www.postgresql.org/message-id/flat/CAEepm%3D2D_JGb8X%3DLa-0PX9C8dBX9%3Dj9wY%2By1-zDWkcJu0%3DBQbA%40mail.gmail.com work_mem | 128MB dynamic_shared_memory_type | posix version | PostgreSQL 12.1 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23), 64-bit Running centos 6.9 / linux 2.6.32-696.23.1.el6.x86_64 $ free -m total used free shared buffers cached Mem: 7871 7223 648 1531 5 1988 -/+ buffers/cache: 5229 2642 Swap: 4095 2088 2007 $ mount | grep /dev/shm tmpfs on /dev/shm type tmpfs (rw) $ du -hs /dev/shm 0 /dev/shm $ df /dev/shm Filesystem 1K-blocks Used Available Use% Mounted on tmpfs 4030272 24 4030248 1% /dev/shm Later, I see: $ df -h /dev/shm Filesystem Size Used Avail Use% Mounted on tmpfs 3.9G 3.3G 601M 85% /dev/shm I can reproduce the error running a single instance of the query. The query plan is 1300 lines long, and involves 482 "Scan" nodes on a table which currently has 93 partitions, and for which current partitions are "daily". I believe I repartitioned its history earlier this year to "monthly", probably to avoid "OOM with many sorts", as reported here: https://www.postgresql.org/message-id/20190708164401.GA22387%40telsasoft.com $ grep Scan tmp/sql-`date +%F`.1.ex |sed 's/^ *//; s/ on .*//' |sort |uniq -c |sort -nr 227 -> Parallel Bitmap Heap Scan 227 -> Bitmap Index Scan 14 -> Parallel Seq Scan 9 -> Seq Scan 2 -> Subquery Scan 2 -> Index Scan using sites_pkey 1 Subquery Scan There are total of 10 "Workers Planned": grep -o 'Worker.*' tmp/sql-`date +%F`.1.ex Workers Planned: 2 Workers Planned: 2 Workers Planned: 2 Workers Planned: 2 Workers Planned: 2 I will plan to repartition again to month granularity unless someone wants to collect further information or suggest a better solution. (gdb) bt #0 pg_re_throw () at elog.c:1717 #1 0x0000000000886194 in errfinish (dummy=<value optimized out>) at elog.c:464 #2 0x0000000000749453 in dsm_impl_posix (op=<value optimized out>, handle=<value optimized out>, request_size=<value optimized out>, impl_private=<value optimized out>, mapped_address=<value optimized out>, mapped_size=<value optimized out>, elevel=20) at dsm_impl.c:283 #3 dsm_impl_op (op=<value optimized out>, handle=<value optimized out>, request_size=<value optimized out>, impl_private=<value optimized out>, mapped_address=<value optimized out>, mapped_size=<value optimized out>, elevel=20) at dsm_impl.c:170 #4 0x000000000074a7c8 in dsm_create (size=100868096, flags=0) at dsm.c:459 #5 0x00000000008a94a6 in make_new_segment (area=0x1d70208, requested_pages=<value optimized out>) at dsa.c:2156 #6 0x00000000008aa47a in dsa_allocate_extended (area=0x1d70208, size=100663304, flags=5) at dsa.c:712 #7 0x0000000000670b3f in pagetable_allocate (pagetable=<value optimized out>, size=<value optimized out>) at tidbitmap.c:1511 #8 0x000000000067200c in pagetable_grow (tbm=0x7f82274da8e8, pageno=906296) at ../../../src/include/lib/simplehash.h:405 #9 pagetable_insert (tbm=0x7f82274da8e8, pageno=906296) at ../../../src/include/lib/simplehash.h:530 #10 tbm_get_pageentry (tbm=0x7f82274da8e8, pageno=906296) at tidbitmap.c:1225 #11 0x00000000006724a0 in tbm_add_tuples (tbm=0x7f82274da8e8, tids=<value optimized out>, ntids=1, recheck=false) at tidbitmap.c:405 #12 0x00000000004d7f1f in btgetbitmap (scan=0x1d7d948, tbm=0x7f82274da8e8) at nbtree.c:334 #13 0x00000000004d103a in index_getbitmap (scan=0x1d7d948, bitmap=<value optimized out>) at indexam.c:665 #14 0x00000000006323d8 in MultiExecBitmapIndexScan (node=0x1dcbdb8) at nodeBitmapIndexscan.c:105 #15 0x00000000006317f4 in BitmapHeapNext (node=0x1d8a030) at nodeBitmapHeapscan.c:141 #16 0x000000000062405c in ExecScanFetch (node=0x1d8a030, accessMtd=0x6316d0 <BitmapHeapNext>, recheckMtd=0x631440 <BitmapHeapRecheck>) at execScan.c:133 #17 ExecScan (node=0x1d8a030, accessMtd=0x6316d0 <BitmapHeapNext>, recheckMtd=0x631440 <BitmapHeapRecheck>) at execScan.c:200 #18 0x0000000000622900 in ExecProcNodeInstr (node=0x1d8a030) at execProcnode.c:461 #19 0x000000000062c66f in ExecProcNode (pstate=0x1d7ad70) at ../../../src/include/executor/executor.h:239 #20 ExecAppend (pstate=0x1d7ad70) at nodeAppend.c:292 #21 0x0000000000622900 in ExecProcNodeInstr (node=0x1d7ad70) at execProcnode.c:461 #22 0x0000000000637da2 in ExecProcNode (pstate=0x1d7a630) at ../../../src/include/executor/executor.h:239 #23 ExecHashJoinOuterGetTuple (pstate=0x1d7a630) at nodeHashjoin.c:833 #24 ExecHashJoinImpl (pstate=0x1d7a630) at nodeHashjoin.c:356 #25 ExecHashJoin (pstate=0x1d7a630) at nodeHashjoin.c:572 #26 0x0000000000622900 in ExecProcNodeInstr (node=0x1d7a630) at execProcnode.c:461 #27 0x0000000000637da2 in ExecProcNode (pstate=0x1d7bff0) at ../../../src/include/executor/executor.h:239 #28 ExecHashJoinOuterGetTuple (pstate=0x1d7bff0) at nodeHashjoin.c:833 #29 ExecHashJoinImpl (pstate=0x1d7bff0) at nodeHashjoin.c:356 #30 ExecHashJoin (pstate=0x1d7bff0) at nodeHashjoin.c:572 #31 0x0000000000622900 in ExecProcNodeInstr (node=0x1d7bff0) at execProcnode.c:461 #32 0x000000000061eac7 in ExecProcNode (queryDesc=0x7f8228b72198, direction=<value optimized out>, count=0, execute_once=240) at ../../../src/include/executor/executor.h:239 #33 ExecutePlan (queryDesc=0x7f8228b72198, direction=<value optimized out>, count=0, execute_once=240) at execMain.c:1646 #34 standard_ExecutorRun (queryDesc=0x7f8228b72198, direction=<value optimized out>, count=0, execute_once=240) at execMain.c:364 #35 0x00007f8229aa7878 in pgss_ExecutorRun (queryDesc=0x7f8228b72198, direction=ForwardScanDirection, count=0, execute_once=true) at pg_stat_statements.c:893 #36 0x00007f8228f8d9ad in explain_ExecutorRun (queryDesc=0x7f8228b72198, direction=ForwardScanDirection, count=0, execute_once=true) at auto_explain.c:320 #37 0x000000000061f0ce in ParallelQueryMain (seg=0x1c8d3b8, toc=0x7f82291f1000) at execParallel.c:1399 #38 0x00000000004f7daf in ParallelWorkerMain (main_arg=<value optimized out>) at parallel.c:1431 #39 0x00000000006eb2e0 in StartBackgroundWorker () at bgworker.c:834 #40 0x00000000006f52ac in do_start_bgworker () at postmaster.c:5770 #41 maybe_start_bgworkers () at postmaster.c:5996 #42 0x00000000006f867d in sigusr1_handler ( postgres_signal_arg=<value optimized out>) at postmaster.c:5167 #43 <signal handler called> #44 0x0000003049ae1603 in __select_nocancel () from /lib64/libc.so.6 #45 0x00000000006f9d43 in ServerLoop (argc=<value optimized out>, argv=<value optimized out>) at postmaster.c:1668 #46 PostmasterMain (argc=<value optimized out>, argv=<value optimized out>) at postmaster.c:1377 #47 0x000000000066a6b0 in main (argc=3, argv=0x1c5b950) at main.c:228 bt f: #2 0x0000000000749453 in dsm_impl_posix (op=<value optimized out>, handle=<value optimized out>, request_size=<value optimizedout>, impl_private=<value optimized out>, mapped_address=<value optimized out>, mapped_size=<value optimized out>,elevel=20) at dsm_impl.c:283 save_errno = <value optimized out> st = {st_dev = 26, st_ino = 0, st_nlink = 33554432, st_mode = 4096, st_uid = 0, st_gid = 65536, __pad0 = 0, st_rdev= 8975118, st_size = 496105863, st_blksize = 8974012, st_blocks = 140199374652304, st_atim = {tv_sec = 7593264, tv_nsec= 20}, st_mtim = {tv_sec = 85899345920, tv_nsec = 140198902917632}, st_ctim = {tv_sec = 44417024, tv_nsec = 289}, __unused = {7593264, 0, 140724603453440}} flags = <value optimized out> fd = <value optimized out> name = "/PostgreSQL.1648263397\000\000\024\242m\371\000\000\000\000\000\061w3\202\177\000\000\000\000\000\001\000\000\000\000pe\363\001\000\000\000\000\360\271\305\001\000\000\000" address = <value optimized out> Thanks, Justin
pgsql-hackers by date: