Thread: Re: backend crash on DELETE, reproducible locally
> I am running this delete statement via psql > > DELETE FROM ref.auto_hint WHERE title = 'Kontraindikation: ACE/Sartan <-> Schwangerschaft'; > > and it crashes the backend it is running on. For what it is worth I have identified the physical file gnumed_v21=# select pg_relation_filepath('ref.auto_hint'); pg_relation_filepath ---------------------- base/139580/111088 (1 Zeile) and phyically copied that to another place (so there's no read error on the disk - but the DELETE would perhaps ask for a physical write test ?). Attached the output of pg_filedump which, to me, doesn't seem to show anything out of the ordinary either. Corrupt index should not be the case because the databse is REINDEXed during the upgrade process before the failing statement. Will attempt to get a stack trace ... Thanks, Karsten -- GPG 40BE 5B0E C98E 1713 AFA6 5BC0 3BEA AC80 7D4F C89B
Attachment
On Thu, Nov 01, 2018 at 03:42:57PM +0100, Karsten Hilbert wrote: > Will attempt to get a stack trace ... Meanwhile, in case it helps, an strace of the crash. epoll_wait(7, [{EPOLLIN, {u32=47607120, u64=35184419695952}}], 1, -1) = 1 recv(13, "Q\0\0\0`DELETE FROM ref.auto_hint W"..., 8192, 0) = 97 gettimeofday({tv_sec=1541084179, tv_usec=197395}, NULL) = 0 gettimeofday({tv_sec=1541084179, tv_usec=197442}, NULL) = 0 write(2, "2018-11-01 14:56:19 GMT LOG: 00"..., 209) = 209 _llseek(45, 0, [16384], SEEK_END) = 0 _llseek(47, 0, [16384], SEEK_END) = 0 _llseek(48, 0, [16384], SEEK_END) = 0 _llseek(49, 0, [16384], SEEK_END) = 0 _llseek(45, 0, [16384], SEEK_END) = 0 kill(10377, SIGUSR1) = 0 _llseek(45, 0, [0], SEEK_SET) = 0 read(45, "\0\0\0\0\234\270\246F\10\373\0\0D\0x\n\0 \4 I\337\1\0\0\236\0\4\0\200\1\0"..., 8192) = 8192 _llseek(43, 81920, [81920], SEEK_SET) = 0 read(43, "\0\0\0\0x{pF8o\0\0\264\1\34\33\360\37\4 \0\0\0\0\344\237\30\0\330\237\30\0"..., 8192) = 8192 _llseek(44, 925696, [925696], SEEK_SET) = 0 read(44, "\0\0\0\0\310\324\231FE\231\1\0L\0l\1\0 \4 \0\0\0\0\24\234\326\7<\231\254\5"..., 8192) = 8192 openat(AT_FDCWD, "base/139580/139695", O_RDWR|O_LARGEFILE) = 56 read(56, "\0\0\0\0\350\347\220E`B\0\0<\0\360\37\360\37\4 \0\0\0\0b1\5\0\3\0\0\0"..., 8192) = 8192 read(56, "\0\0\0\0h\347\220E\1,\0\0(\0\300\37\360\37\4 \0\0\0\0\344\237\30\0\330\237\30\0"..., 8192) = 8192 openat(AT_FDCWD, "base/139580/2612", O_RDWR|O_LARGEFILE) = 57 read(57, "\0\0\0\0\200\334s\1a\6\4\0(\0@\36\0 \4 \0\0\0\0\220\237\340\0 \237\340\0"..., 8192) = 8192 _llseek(43, 65536, [65536], SEEK_SET) = 0 read(43, "\0\0\0\0\214e^F\215w\0\0H\7`\n\360\37\4 \0\0\0\0x\212\30\0\344\237\30\0"..., 8192) = 8192 _llseek(44, 606208, [606208], SEEK_SET) = 0 read(44, "\0\0\0\0\20\206\237FVP\1\0p\0\230\3\0 \4 \361\360\1\0000\237\232\1T\236\266\1"..., 8192) = 8192 stat64("/usr/lib/postgresql/11/lib/plpgsql", 0xbfbda4ec) = -1 ENOENT (No such file or directory) stat64("/usr/lib/postgresql/11/lib/plpgsql.so", {st_mode=S_IFREG|0644, st_size=197944, ...}) = 0 stat64("/usr/lib/postgresql/11/lib/plpgsql.so", {st_mode=S_IFREG|0644, st_size=197944, ...}) = 0 futex(0xb78b904c, FUTEX_WAKE_PRIVATE, 2147483647) = 0 openat(AT_FDCWD, "/usr/lib/postgresql/11/lib/plpgsql.so", O_RDONLY|O_CLOEXEC) = 58 read(58, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\300a\0\0004\0\0\0"..., 512) = 512 fstat64(58, {st_mode=S_IFREG|0644, st_size=197944, ...}) = 0 mmap2(NULL, 201108, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 58, 0) = 0xb293f000 mmap2(0xb2944000, 102400, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 58, 0x5000) = 0xb2944000 mmap2(0xb295d000, 69632, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 58, 0x1e000) = 0xb295d000 mmap2(0xb296e000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 58, 0x2e000) = 0xb296e000 close(58) = 0 mprotect(0xb296e000, 8192, PROT_READ) = 0 openat(AT_FDCWD, "global/139651", O_RDWR|O_LARGEFILE) = 58 read(58, "\0\0\0\0\320\200zE\324\16\0\0<\0\360\37\360\37\4 \0\0\0\0b1\5\0\3\0\0\0"..., 8192) = 8192 read(58, "\0\0\0\0P\200zE.+\0\0\320\0\310\35\360\37\4 \0\0\0\0\344\237\30\0\330\237\30\0"..., 8192) = 8192 openat(AT_FDCWD, "base/139580/139719", O_RDWR|O_LARGEFILE) = 59 read(59, "\0\0\0\0x\347\232E\226s\0\0<\0\360\37\360\37\4 \0\0\0\0b1\5\0\3\0\0\0"..., 8192) = 8192 read(59, "\0\0\0\0l\250\16F\22\340\0\0h\0\0\37\360\37\4 \0\0\0\0\344\237\30\0\330\237\30\0"..., 8192) = 8192 read(42, "\0\0\0\0`\235^\1\177\242\4\0\320\0\30\1\0 \4 \0\0\0\0T\237X\1\250\236X\1"..., 8192) = 8192 openat(AT_FDCWD, "base/139580/139606", O_RDWR|O_LARGEFILE) = 60 read(60, "\0\0\0\0\244E{F\206q\0\0<\0\360\37\360\37\4 \0\0\0\0b1\5\0\3\0\0\0"..., 8192) = 8192 _llseek(60, 24576, [24576], SEEK_SET) = 0 read(60, "\0\0\0\0\224\272wE\re\0\0008\0\20\37\360\37\4 \0\0\0\0\350\237\20\0\320\2370\0"..., 8192) = 8192 read(60, "\0\0\0\0tD{F\34S\0\0\220\3\324\6\360\37\4 \0\0\0\0t\2078\0\300\237`\0"..., 8192) = 8192 brk(0x2e07000) = 0x2e07000 _llseek(41, 32768, [32768], SEEK_SET) = 0 read(41, "\0\0\0\0\320B{F\23I\0\0\230\6p\f\360\37\4 \0\0\0\0p\214\30\0\344\237\30\0"..., 8192) = 8192 _llseek(42, 335872, [335872], SEEK_SET) = 0 read(42, "\0\0\0\0 \"|FB\177\5\0\340\0\320\7\0 \4 Z\362\1\0\0\0\0\0\0\0\0\0"..., 8192) = 8192 openat(AT_FDCWD, "base/139580/139666", O_RDWR|O_LARGEFILE) = 61 read(61, "\0\0\0\0\244\326\212E\354\30\0\0<\0\360\37\360\37\4 \0\0\0\0b1\5\0\3\0\0\0"..., 8192) = 8192 _llseek(61, 24576, [24576], SEEK_SET) = 0 read(61, "\0\0\0\0$\326\212E\217\235\0\0\220\0`\31\360\37\4 \0\0\0\0\350\237\20\0\254\237x\0"..., 8192) = 8192 _llseek(61, 8192, [8192], SEEK_SET) = 0 read(61, "\0\0\0\0008\365XF\253\246\0\0\30\2\214\3\360\37\4 \0\0\0\0000\205x\0\270\237p\0"..., 8192) = 8192 read(43, "\0\0\0\0H>mFL\204\0\0D\7l\n\360\37\4 \0\0\0\0\344\237\30\0\330\237\30\0"..., 8192) = 8192 brk(0x2e29000) = 0x2e29000 _llseek(31, 40960, [40960], SEEK_SET) = 0 read(31, "\0\0\0\0\270\244.F}8\0\0\220\1\354\22\360\37\4 \0\0\0\0\320\237@\0\254\237H\0"..., 8192) = 8192 _llseek(16, 2605056, [2605056], SEEK_SET) = 0 read(16, "\0\0\0\0\304\347\220FX\37\5\0\10\1d\7\0 \4 \0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192 read(33, "\0\0\0\0\230x\375<b\341\0\0\204\0\240\1\0 \4 \0\0\0\0\364\235\30\4<\235j\1"..., 8192) = 8192 _llseek(34, 98304, [98304], SEEK_SET) = 0 read(34, "\0\0\0\0<\214vF\266\210\0\0\204\1\260\r\360\37\4 \0\0\0\0\250\237\220\0t\237h\0"..., 8192) = 8192 _llseek(35, 253952, [253952], SEEK_SET) = 0 read(35, "\0\0\0\0\304\35\240F\367\6\5\0x\0|\0\0 \4 \347\360\1\0\0\0\0\0\3\0\1\0"..., 8192) = 8192 _llseek(35, 1187840, [1187840], SEEK_SET) = 0 read(35, "\0\0\0\0\314=\240F\2\203\5\0\224\0\324\0\0 \4 \352\344\1\0T\227\376\4\0\0\0\0"..., 8192) = 8192 _llseek(35, 1122304, [1122304], SEEK_SET) = 0 read(35, "\0\0\0\0\\\\\240Fm\252\5\0H\0\0\2\0 \4 \0\0\0\0\200\235\374\4\0\233\374\4"..., 8192) = 8192 _llseek(35, 450560, [450560], SEEK_SET) = 0 read(35, "\0\0\0\0\360e\240FO\212\5\0\204\0008\27\0 \4 \347\360\1\0\0\0\0\0\4\0\1\0"..., 8192) = 8192 _llseek(44, 73728, [73728], SEEK_SET) = 0 read(44, "\0\0\0\0\330\206d\1\35\252\4\0\310\0\4\1\0 \4 \0\0\0\0P\237^\1\240\236`\1"..., 8192) = 8192 _llseek(43, 32768, [32768], SEEK_SET) = 0 read(43, "\0\0\0\0\214\311\206E\"\331\0\0@\7x\n\360\37\4 \0\0\0\0x\212\30\0\344\237\30\0"..., 8192) = 8192 _llseek(44, 188416, [188416], SEEK_SET) = 0 read(44, "\0\0\0\0`'f\1/4\4\0\304\0\360\0\0 \4 \0\0\0\0L\237h\1\230\236h\1"..., 8192) = 8192 _llseek(23, 49152, [49152], SEEK_SET) = 0 read(23, "\0\0\0\0$\236.F*Q\0\0\374\3D\24\360\37\4 \0\0\0\0\344\237\30\0\330\237\30\0"..., 8192) = 8192 _llseek(24, 73728, [73728], SEEK_SET) = 0 read(24, "\0\0\0\0\244\34\203E\314\225\0\0\324\5\0\t\360\37\4 \0\0\0\0\0\211 \0\340\237 \0"..., 8192) = 8192 _llseek(16, 417792, [417792], SEEK_SET) = 0 read(16, "\0\0\0\0\234\5\210F\2474\5\0\30\1\34\4\0 \4 \0\0\0\0t\237\30\1\350\236\30\1"..., 8192) = 8192 openat(AT_FDCWD, "base/139580/139770", O_RDWR|O_LARGEFILE) = 62 read(62, "\0\0\0\0P_\277E\233!\0\0<\0\360\37\360\37\4 \0\0\0\0b1\5\0\3\0\0\0"..., 8192) = 8192 read(62, "\0\0\0\0\320^\277E/S\0\0\224\2|\30\360\37\4 \0\0\0\0\344\237\30\0\330\237\30\0"..., 8192) = 8192 openat(AT_FDCWD, "base/139580/2224", O_RDWR|O_LARGEFILE) = 63 read(63, "\0\0\0\0\270s\330<\311\265\1\0\34\3x\6\0 \4 -\250\1\0\2\0\1\0\264\237\222\0"..., 8192) = 8192 openat(AT_FDCWD, "base/139580/90709", O_RDWR|O_LARGEFILE) = 64 read(64, "\0\0\0\0Tf\240F\375\242\0\0\34\0\320\37\374\37\4 \0\0\0\0\320\237R\0\0\0\0\0"..., 8192) = 8192 _llseek(43, 16384, [16384], SEEK_SET) = 0 read(43, "\0\0\0\0|\254\206EW\310\0\0@\7x\n\360\37\4 \0\0\0\0x\212\30\0\344\237\30\0"..., 8192) = 8192 _llseek(44, 131072, [131072], SEEK_SET) = 0 read(44, "\0\0\0\0\344Fe\1\305\372\4\0\300\0\f\1\0 \4 \0\0\0\0D\237r\1\210\236r\1"..., 8192) = 8192 _llseek(43, 40960, [40960], SEEK_SET) = 0 read(43, "\0\0\0\0\234\346\206E\245\361\0\0@\7x\n\360\37\4 \0\0\0\0x\212\30\0\344\237\30\0"..., 8192) = 8192 _llseek(44, 147456, [147456], SEEK_SET) = 0 read(44, "\0\0\0\0\360\206e\1\r\331\4\0\274\0H\1\0 \4 \0\0\0\0P\237`\1\210\236\216\1"..., 8192) = 8192 gettimeofday({tv_sec=1541084179, tv_usec=208768}, NULL) = 0 gettimeofday({tv_sec=1541084179, tv_usec=208829}, NULL) = 0 openat(AT_FDCWD, "base/139580/111805_fsm", O_RDWR|O_LARGEFILE) = -1 ENOENT (No such file or directory) openat(AT_FDCWD, "base/139580/111805", O_RDWR|O_LARGEFILE) = 65 _llseek(65, 0, [8192], SEEK_END) = 0 _llseek(65, 0, [0], SEEK_SET) = 0 read(65, "\0\0\0\0hs\240FT.\0\0(\0L\23\0 \4 \0\0\0\0\224\234\330\6\374\231*\5"..., 8192) = 8192 read(45, "\0\0\0\0\274\324\246F^a\0\0,\0T\4\0 \4 \0\0\0\0,\231\244\r@\221\330\17"..., 8192) = 8192 openat(AT_FDCWD, "base/139580/139675", O_RDWR|O_LARGEFILE) = 66 read(66, "\0\0\0\0\274\264/F\4\214\0\0<\0\360\37\360\37\4 \0\0\0\0b1\5\0\3\0\0\0"..., 8192) = 8192 _llseek(66, 24576, [24576], SEEK_SET) = 0 read(66, "\0\0\0\0d^uF\2\250\0\0000\0\254\37\360\37\4 \0\0\0\0\350\237\20\0\304\237\30\0"..., 8192) = 8192 _llseek(66, 49152, [49152], SEEK_SET) = 0 read(66, "\0\0\0\0t\264/F\247\n\0\0\f\3\24\27\360\37\4 \0\0\0\0\344\237\30\0\330\237\30\0"..., 8192) = 8192 _llseek(35, 114688, [114688], SEEK_SET) = 0 read(35, "\0\0\0\0\204\241\240F\376\212\5\0\230\0\34\17\0 \4 \347\360\1\0\0\0\0\0d\2378\1"..., 8192) = 8192 _llseek(24, 303104, [303104], SEEK_SET) = 0 read(24, "\0\0\0\0\220\213\207F5\241\0\0\\\3\340\22\360\37\4 \0\0\0\0\340\222 \0\340\237 \0"..., 8192) = 8192 _llseek(16, 2834432, [2834432], SEEK_SET) = 0 read(16, "\0\0\0\0\370\226\221F\305\271\5\0\f\1\f\17\0 \4 \371\360\1\0t\237\30\1\350\236\30\1"..., 8192) = 8192 _llseek(33, 352256, [352256], SEEK_SET) = 0 read(33, "\0\0\0\0\320WKC\3462\0\0\200\0D\7\0 \4 \0\0\0\0\360\235\32\0048\235j\1"..., 8192) = 8192 _llseek(34, 65536, [65536], SEEK_SET) = 0 read(34, "\0\0\0\0h\266vFx\34\0\0\220\0\200\31\360\37\4 \0\0\0\0\250\237\220\0t\237h\0"..., 8192) = 8192 _llseek(35, 1064960, [1064960], SEEK_SET) = 0 read(35, "\0\0\0\0\304a\33F\16\340\5\0H\0\360\1\0 \4 \0\0\0\0|\235\4\5\370\232\4\5"..., 8192) = 8192 _llseek(35, 122880, [122880], SEEK_SET) = 0 read(35, "\0\0\0\0p\274/FnR\5\0x\0\220\26\0 \4 \347\360\1\0\5\0\1\0\0\0\0\0"..., 8192) = 8192 _llseek(35, 49152, [49152], SEEK_SET) = 0 read(35, "\0\0\0\0`IrF\236\337\1\0t\0$\33\0 \4 \347\360\1\0\0\0\0\0d\2378\1"..., 8192) = 8192 _llseek(35, 40960, [40960], SEEK_SET) = 0 read(35, "\0\0\0\0\234\304vF\267\306\1\0\310\0\364\7\0 \4 \347\360\1\0|\231\n\1\364\230\n\1"..., 8192) = 8192 _llseek(35, 73728, [73728], SEEK_SET) = 0 read(35, "\0\0\0\0004\215xF\277l\0\0\204\0\200\10\0 \4 \347\360\1\0\0\200\1\0\0\200\1\0"..., 8192) = 8192 _llseek(36, 106496, [106496], SEEK_SET) = 0 read(36, "\0\0\0\0|\n\224F\252V\0\0\\\0\334\34\360\37\4 \0\0\0\0\254\237\210\0|\237`\0"..., 8192) = 8192 _llseek(37, 376832, [376832], SEEK_SET) = 0 read(37, "\0\0\0\0\20\310vFmN\5\0\350\0\304\30\0 \4 \0\0\0\0\20\237\332\1\0\236 \2"..., 8192) = 8192 --- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_MAPERR, si_addr=0x10} --- +++ killed by SIGSEGV +++ Karsten Hilbert -- GPG 40BE 5B0E C98E 1713 AFA6 5BC0 3BEA AC80 7D4F C89B
On Thu, Nov 01, 2018 at 04:11:33PM +0100, Karsten Hilbert wrote: >> Will attempt to get a stack trace ... Eventually, the stack trace (attached). Thanks for insights, Karsten -- GPG 40BE 5B0E C98E 1713 AFA6 5BC0 3BEA AC80 7D4F C89B
Attachment
On 2018-Nov-01, Karsten Hilbert wrote: > Program received signal SIGSEGV, Segmentation fault. > heap_attisnull (tup=0x0, attnum=5, tupleDesc=0xb2990ef4) at ./build/../src/backend/access/common/heaptuple.c:403 > 403 ./build/../src/backend/access/common/heaptuple.c: Datei oder Verzeichnis nicht gefunden. > (gdb) thread apply all bt full > > Thread 1 (Thread 0xb4c429c0 (LWP 22367)): > #0 heap_attisnull (tup=0x0, attnum=5, tupleDesc=0xb2990ef4) at ./build/../src/backend/access/common/heaptuple.c:403 > __func__ = "heap_attisnull" > #1 0x0087690d in ri_NullCheck (tupDesc=0xb2990ef4, tup=0x0, riinfo=0x2e1d548, rel_is_pk=true) at ./build/../src/backend/utils/adt/ri_triggers.c:2894 > attnums = 0x2e1d5a4 > i = 0 > allnull = true > nonenull = true > #2 0x00879bf7 in RI_FKey_cascade_del (fcinfo=0xbfbda9e4) at ./build/../src/backend/utils/adt/ri_triggers.c:917 Ah, now this is interesting. Can you please supply the definition of the table? I'm wondering if there is a partitioned table with an FK to this one. I'm not quite seeing how come 'tup' is NULL there. Can you 'print trigdata' in frame 2? In general terms, this bug report would have been more actionable if you had shown the definition of the tables involved right from the start. -- Álvaro Herrera https://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
On Thu, Nov 01, 2018 at 12:27:23PM -0300, Alvaro Herrera wrote: > In general terms, this bug report would have been more actionable if you > had shown the definition of the tables involved right from the start. Sorry for that, will supply. Karsten -- GPG 40BE 5B0E C98E 1713 AFA6 5BC0 3BEA AC80 7D4F C89B
On Thu, Nov 01, 2018 at 12:27:23PM -0300, Alvaro Herrera wrote: > Ah, now this is interesting. Can you please supply the definition of > the table? Attached. > I'm wondering if there is a partitioned table with an FK to > this one. There is. Both ref.auto_hint and clin.suppressed_hint are using inheritance (from audit.audit_fields). However, GNUmed does not use inheritance for explicit partitioning but rather similar to how classes and subclasses are used in OO languages. > I'm not quite seeing how come 'tup' is NULL there. Can you > 'print trigdata' in frame 2? Sure, how ? :-) (I can surely type "print trigdata" but does that already auto-select from "frame 2" ?) Karsten -- GPG 40BE 5B0E C98E 1713 AFA6 5BC0 3BEA AC80 7D4F C89B
Attachment
Alvaro Herrera <alvherre@2ndquadrant.com> writes: > Ah, now this is interesting. Can you please supply the definition of > the table? I'm wondering if there is a partitioned table with an FK to > this one. I'm not quite seeing how come 'tup' is NULL there. Yeah, apparently we've passed a null OLD tuple to an RI_FKey_cascade_del trigger, which surely shouldn't happen. It'd be interesting to look at the set of triggers on this table. I don't entirely trust psql's \d to show us reality if there's something screwed up about the triggers, so in addition to \d output, could we see select * from pg_trigger where tgrelid = 'ref.auto_hint'::regclass; regards, tom lane
On Thu, Nov 01, 2018 at 12:27:23PM -0300, Alvaro Herrera wrote: > > Program received signal SIGSEGV, Segmentation fault. > > heap_attisnull (tup=0x0, attnum=5, tupleDesc=0xb2990ef4) at ./build/../src/backend/access/common/heaptuple.c:403 > > 403 ./build/../src/backend/access/common/heaptuple.c: Datei oder Verzeichnis nicht gefunden. > > (gdb) thread apply all bt full > > > > Thread 1 (Thread 0xb4c429c0 (LWP 22367)): > > #0 heap_attisnull (tup=0x0, attnum=5, tupleDesc=0xb2990ef4) at ./build/../src/backend/access/common/heaptuple.c:403 > > __func__ = "heap_attisnull" > > #1 0x0087690d in ri_NullCheck (tupDesc=0xb2990ef4, tup=0x0, riinfo=0x2e1d548, rel_is_pk=true) at ./build/../src/backend/utils/adt/ri_triggers.c:2894 > > attnums = 0x2e1d5a4 > > i = 0 > > allnull = true > > nonenull = true > > #2 0x00879bf7 in RI_FKey_cascade_del (fcinfo=0xbfbda9e4) at ./build/../src/backend/utils/adt/ri_triggers.c:917 > > Can you 'print trigdata' in frame 2? Does this help at all ? (gdb) frame 2 #2 0x00879bf7 in RI_FKey_cascade_del (fcinfo=0xbfbda9e4) at ./build/../src/backend/utils/adt/ri_triggers.c:917 917 ./build/../src/backend/utils/adt/ri_triggers.c: Datei oder Verzeichnis nicht gefunden. (gdb) print trigdata $1 = (TriggerData *) 0xbfbdaca4 It is from another run, however. Karsten -- GPG 40BE 5B0E C98E 1713 AFA6 5BC0 3BEA AC80 7D4F C89B
On Thu, Nov 01, 2018 at 11:43:56AM -0400, Tom Lane wrote: > Alvaro Herrera <alvherre@2ndquadrant.com> writes: > > Ah, now this is interesting. Can you please supply the definition of > > the table? I'm wondering if there is a partitioned table with an FK to > > this one. I'm not quite seeing how come 'tup' is NULL there. > > Yeah, apparently we've passed a null OLD tuple to an RI_FKey_cascade_del > trigger, which surely shouldn't happen. It'd be interesting to look at > the set of triggers on this table. I don't entirely trust psql's \d > to show us reality if there's something screwed up about the triggers, > so in addition to \d output, could we see > > select * from pg_trigger where tgrelid = 'ref.auto_hint'::regclass; Ausgabeformat ist »wrapped«. tgrelid | tgname | tgfoid | tgtype | tgenabled | tgisinternal | tgconstrrelid | tgconstrindid | tgconstraint| tgdeferrable | tginitdeferred | tgnargs | tgattr | tgargs | tgqual | tgoldtable | tgnewtable ---------+-------------------------------+--------+--------+-----------+--------------+---------------+---------------+--------------+--------------+----------------+---------+--------+--------+--------+------------+------------ 111088 | RI_ConstraintTrigger_a_116970 | 1646 | 9 | O | t | 116789 | 111099 | 116969 | f | f | 0 | | \x | | | 111088 | RI_ConstraintTrigger_a_116971 | 1649 | 17 | O | t | 116789 | 111099 | 116969 | f | f | 0 | | \x | | | 111088 | zt_ins_auto_hint | 141652 | 7 | O | f | 0 | 0 | 0 | f | f | 0 | | \x | | | 111088 | zt_upd_auto_hint | 141654 | 19 | O | f | 0 | 0 | 0 | f | f | 0 | | \x | | | 111088 | zt_del_auto_hint | 141656 | 11 | O | f | 0 | 0 | 0 | f | f | 0 | | \x | | | (5 Zeilen) BTW, do you want me to attach text files or include into the body (potential wrapping issues on display) ? Thanks, Karsten -- GPG 40BE 5B0E C98E 1713 AFA6 5BC0 3BEA AC80 7D4F C89B
Karsten Hilbert <Karsten.Hilbert@gmx.net> writes: > On Thu, Nov 01, 2018 at 11:43:56AM -0400, Tom Lane wrote: >> Yeah, apparently we've passed a null OLD tuple to an RI_FKey_cascade_del >> trigger, which surely shouldn't happen. It'd be interesting to look at >> the set of triggers on this table. I don't entirely trust psql's \d >> to show us reality if there's something screwed up about the triggers, >> so in addition to \d output, could we see >> select * from pg_trigger where tgrelid = 'ref.auto_hint'::regclass; > [ pretty normal-looking trigger entries ] I was feeling baffled about this, but it suddenly occurs to me that maybe the bug fixed in 040a1df61/372102b81 explains this. If the particular tuple you're trying to delete predates the last ALTER TABLE ADD COLUMN operation on the table, then this bug would result in t_self getting set to "invalid", which'd cause AfterTriggerSaveEvent to save "ate_ctid1" as "invalid", which'd lead to a null tuple getting passed when the trigger eventually gets invoked. regards, tom lane
On Fri, Nov 02, 2018 at 11:56:58PM -0400, Tom Lane wrote: > > On Thu, Nov 01, 2018 at 11:43:56AM -0400, Tom Lane wrote: > >> Yeah, apparently we've passed a null OLD tuple to an RI_FKey_cascade_del > >> trigger, which surely shouldn't happen. It'd be interesting to look at > >> the set of triggers on this table. I don't entirely trust psql's \d > >> to show us reality if there's something screwed up about the triggers, > >> so in addition to \d output, could we see > >> select * from pg_trigger where tgrelid = 'ref.auto_hint'::regclass; > > > [ pretty normal-looking trigger entries ] > > I was feeling baffled about this, but it suddenly occurs to me that maybe > the bug fixed in 040a1df61/372102b81 explains this. If the particular > tuple you're trying to delete predates the last ALTER TABLE ADD COLUMN > operation on the table, then this bug would result in t_self getting > set to "invalid", which'd cause AfterTriggerSaveEvent to save "ate_ctid1" > as "invalid", which'd lead to a null tuple getting passed when the trigger > eventually gets invoked. Now, that is a theory I can follow up on -- all the bootstrapping SQL scripts are under version control so I can pin down the exact sequence of events. ... goes off to do some checks ... And sure enough there's an ALTER TABLE ADD COLUMN related to that table: alter table ref.auto_hint add column recommendation_query text; -- (audit log table needs to get the new column, too) alter table audit.log_auto_hint add column recommendation_query text; before the DELETE of the pre-existing tuple. When running a rigged upgrade that stops right before those ALTER TABLEs and then doing the following: begin; -- instrument: insert into ref.auto_hint (title, query, hint, source, lang) values ( 'DELETE test', 'select 1;', 'insertion before ADD COLUMN', 'testing', 'en' ); -- works: DELETE from ref.auto_hint where title = 'DELETE test'; -- instrument, again: insert into ref.auto_hint (title, query, hint, source, lang) values ( 'DELETE test', 'select 1;', 'insertion before ADD COLUMN', 'testing', 'en' ); alter table ref.auto_hint add column recommendation_query text; -- audit log table needs to get the message alter table audit.log_auto_hint add column recommendation_query text; -- fails: DELETE from ref.auto_hint where title = 'DELETE test'; rollback; the expected segfault does indeed occur. Conversely, moving the offending DELETE FROM ref.auto_hint WHERE title = 'Kontraindikation: ACE/Sartan <-> Schwangerschaft'; to right before the ALTER TABLEs makes the full upgrade run through without further problems. Looking at 040a1df61/372102b81 feels like it fits the bill. So, I guess I can work around the issue by the above manoeuvre and report back once 040a1df61/372102b81 is released. Anything else you'd want me to look into ? Many thanks, Karsten -- GPG 40BE 5B0E C98E 1713 AFA6 5BC0 3BEA AC80 7D4F C89B
Karsten Hilbert <Karsten.Hilbert@gmx.net> writes: > On Fri, Nov 02, 2018 at 11:56:58PM -0400, Tom Lane wrote: >> I was feeling baffled about this, but it suddenly occurs to me that maybe >> the bug fixed in 040a1df61/372102b81 explains this. > So, I guess I can work around the issue by the above > manoeuvre and report back once 040a1df61/372102b81 is > released. > Anything else you'd want me to look into ? Don't suppose you'd want to build a server with 372102b81 applied and see if it works? regards, tom lane
On Sat, Nov 03, 2018 at 11:39:49AM -0400, Tom Lane wrote: > Karsten Hilbert <Karsten.Hilbert@gmx.net> writes: > > On Fri, Nov 02, 2018 at 11:56:58PM -0400, Tom Lane wrote: > >> I was feeling baffled about this, but it suddenly occurs to me that maybe > >> the bug fixed in 040a1df61/372102b81 explains this. > > > So, I guess I can work around the issue by the above > > manoeuvre and report back once 040a1df61/372102b81 is > > released. > > Anything else you'd want me to look into ? > > Don't suppose you'd want to build a server with 372102b81 applied > and see if it works? Yeah, I feared that answer ;-) However, I lack the skills and the machine(s) to do so ... I will, at any rate, report back when the existing fix is released. Karsten -- GPG 40BE 5B0E C98E 1713 AFA6 5BC0 3BEA AC80 7D4F C89B
Hello,
we reached the exactly same problem after upgrading to PostgreSQL 11 - the server crashed on a DELETE statement with a trigger. We also observed an AFTER DELETE trigger receiving NULL values in OLD. Now I see the problem seems to be solved (theoretically). Unfortunately, we are not able to build the server with the patch, so we cannot confirm that. However, when we just copied the database (within the same server), the same DELETE executed on the copy with no problems.
I would like to ask, however: could the same problem arise from an UPDATE statement (also on a table with an AFTER trigger), or would that be another bug (technical details below)?
As the bug causes our production servers segfault several times a day, we'd like to solve the problem as soon as possible. Do I understand it correctly that if we dump and restore the database, the bug should not occur (until the next ALTER TABLE ADD COLUMN - which we'll avoid until the patch is released)?
With the update, we caught this (we also have the core dump and could provide it privately if useful):
#0 __memcmp_sse4_1 () at ../sysdeps/x86_64/multiarch/memcmp-sse4.S:793
No locals.
#1 0x00007f6a6b993296 in datumIsEqual (value1=7, value2=7, typByVal=<optimized out>, typLen=16) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/utils/adt/datum.c:249
size1 = 16
size2 = <optimized out>
s1 = 0x7 <error: Cannot access memory at address 0x7>
s2 = 0x7 <error: Cannot access memory at address 0x7>
res = <optimized out>
#2 0x00007f6a6b6b89de in ProjIndexIsUnchanged (newtup=0x7f6a6d504e80, oldtup=0x7ffc9c1c1270, relation=0x7f6a6b4c1738) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/access/heap/heapam.c:4539
att = <optimized out>
indexOid = <optimized out>
indexDesc = 0x7f6a6b4c5008
indexInfo = 0x7f6a6d505ca8
i = 0
indexoidlist = <optimized out>
econtext = 0x7f6a6d4c5080
new_isnull = {false, false, false, 156, 252, 127, false, false, 44, 29, 128, 107, 106, 127, false, false, 116, 54, 76, 107, 106, 127, false, false, 123, false, false, false, false, false, false, false}
slot = 0x7f6a6d507b78
equals = true
old_isnull = {false, false, false, 109, 106, 127, false, false, 165, 199, 80, 109, 106, 127, false, false, 160, 199, 80, 109, 106, 127, false, false, 168, 199, 80, 109, 106, 127, false, false}
new_values = {7, 4, 140095077240522, 16, 140722927572064, 140095077271456, 140095077272720, 140722927572079, 0, 0, 140722927572048, 140095054460096, 140095077060032, 140095077242520, 5, 4, 140722927572096, 140095046814123,
140722927572096, 140095046825162, 140095043410520, 140095077060880, 140722927572224, 140095046865098, 60129542543, 23274985272, 140095043410520, 140095077060032, 140095043409720, 140095077060304, 140095077237568,
140095043414504}
indexno = 1
l = 0x7f6a6d50d180
estate = 0x7f6a6d4c4e70
old_values = {7, 5, 140078657614882, 1, 140095077060032, 1024, 140095077271456, 0, 140722927571760, 140095049517261, 140722927571808, 140722927571776, 140722927571792, 140095046814329, 18, 140095077271464, 140095076986224,
140095077237568, 22, 100, 140722927571856, 140722927571920, 140095046831813, 140722927571856, 140095046842624, 0, 140095077265656, 3, 140095077271456, 140095077271792, 140095077271456, 140095077242520}
#3 heap_update (relation=relation@entry=0x7f6a6b4c1738, otid=otid@entry=0x7ffc9c1c1690, newtup=newtup@entry=0x7f6a6d504e80, cid=0, crosscheck=<optimized out>, wait=wait@entry=true, hufd=hufd@entry=0x7ffc9c1c15b0,
lockmode=lockmode@entry=0x7ffc9c1c15a4) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/access/heap/heapam.c:4230
result = <optimized out>
xid = 245028971
hot_attrs = 0x7f6a6d50d0a0
proj_idx_attrs = 0x7f6a6d50d0b8
key_attrs = 0x7f6a6d50d0d0
id_attrs = 0x7f6a6d50d0e8
interesting_attrs = 0x7f6a6d50d100
lp = <optimized out>
oldtup = {t_len = 81, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 9}, ip_posid = 71}, t_tableOid = 65471913, t_data = 0x7f669aa0b3f0}
heaptup = 0x7f6a6d504e80
old_key_tuple = 0x0
old_key_copied = false
page = 0x7f669aa0b180 "=="
block = <optimized out>
mxact_status = <optimized out>
buffer = <optimized out>
newbuf = 17858
vmbuffer = 0
vmbuffer_new = 0
need_toast = <optimized out>
newtupsize = <optimized out>
pagefree = <optimized out>
have_tuple_lock = false
iscombo = false
use_hot_update = false
hot_attrs_checked = <optimized out>
key_intact = <optimized out>
all_visible_cleared = false
all_visible_cleared_new = false
checked_lockers = <optimized out>
locker_remains = <optimized out>
xmax_new_tuple = <optimized out>
xmax_old_tuple = 245028971
infomask_old_tuple = 0
infomask2_old_tuple = 8192
infomask_new_tuple = 144
infomask2_new_tuple = 0
__func__ = "heap_update"
#4 0x00007f6a6b82f2c9 in ExecUpdate (mtstate=mtstate@entry=0x7f6a6d4da3f0, tupleid=0x7ffc9c1c1690, oldtuple=0x0, slot=0x7f6a6d504340, planSlot=planSlot@entry=0x7f6a6d5685e8, epqstate=epqstate@entry=0x7f6a6d4da4b0,
estate=estate@entry=0x7f6a6d4d8ed0, canSetTag=true) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/executor/nodeModifyTable.c:1209
lockmode = LockTupleNoKeyExclusive
partition_constraint_failed = false
tuple = 0x7f6a6d504e80
resultRelInfo = 0x7f6a6d4d9110
resultRelationDesc = 0x7f6a6b4c1738
result = <optimized out>
hufd = {ctid = {ip_blkid = {bi_hi = 5616, bi_lo = 39964}, ip_posid = 32764}, xmax = 1803594893, cmax = 32618}
recheckIndexes = 0x0
saved_tcs_map = 0x0
__func__ = "ExecUpdate"
#5 0x00007f6a6b82f978 in ExecModifyTable (pstate=0x7f6a6d4da3f0) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/executor/nodeModifyTable.c:2166
node = 0x7f6a6d4da3f0
proute = 0x0
estate = 0x7f6a6d4d8ed0
operation = CMD_UPDATE
saved_resultRelInfo = 0x0
resultRelInfo = 0x7f6a6d4d9110
subplanstate = 0x7f6a6d5679f8
junkfilter = 0x7f6a6d504218
slot = <optimized out>
tupleid = <optimized out>
tuple_ctid = {ip_blkid = {bi_hi = 0, bi_lo = 9}, ip_posid = 71}
oldtupdata = {t_len = 2619086544, t_self = {ip_blkid = {bi_hi = 32764, bi_lo = 0}, ip_posid = 9204}, t_tableOid = 32618, t_data = 0x7ffc9c1c16d0}
oldtuple = <optimized out>
__func__ = "ExecModifyTable"
#6 0x00007f6a6b80b25b in ExecProcNode (node=0x7f6a6d4da3f0) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/include/executor/executor.h:237
No locals.
#7 ExecutePlan (execute_once=<optimized out>, dest=0x7f6a6d5ed298, direction=<optimized out>, numberTuples=0, sendTuples=false, operation=CMD_UPDATE, use_parallel_mode=<optimized out>, planstate=0x7f6a6d4da3f0, estate=0x7f6a6d4d8ed0)
at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/executor/execMain.c:1723
slot = <optimized out>
current_tuple_count = 0
#8 standard_ExecutorRun (queryDesc=0x7f6a6d5f5b80, direction=<optimized out>, count=0, execute_once=<optimized out>) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/executor/execMain.c:364
estate = 0x7f6a6d4d8ed0
operation = CMD_UPDATE
dest = 0x7f6a6d5ed298
sendTuples = <optimized out>
__func__ = "standard_ExecutorRun"
#9 0x00007f6a6b95b41d in ProcessQuery (plan=<optimized out>,
sourceText=0x7f6a6d3b45a0 "UPDATE public.schedulecard SET ext_ident=NULL, finalreport_absence_cnt=NULL, period_day=4, period_num=1, rotates=false, rotationgroup_id=NULL, schedulecard_id=NULL, schedulecardblock_id=NULL, schedule"...,
params=0x0, queryEnv=0x0, dest=0x7f6a6d5ed298, completionTag=0x7ffc9c1c19d0 "") at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/pquery.c:161
queryDesc = 0x7f6a6d5f5b80
#10 0x00007f6a6b95b668 in PortalRunMulti (portal=portal@entry=0x7f6a6d47f230, isTopLevel=isTopLevel@entry=true, setHoldSnapshot=setHoldSnapshot@entry=false, dest=dest@entry=0x7f6a6d5ed298, altdest=altdest@entry=0x7f6a6d5ed298,
completionTag=completionTag@entry=0x7ffc9c1c19d0 "") at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/pquery.c:1286
pstmt = 0x7f6a6d5ed138
active_snapshot_set = true
stmtlist_item = 0x7f6a6d5ed248
#11 0x00007f6a6b95c2d5 in PortalRun (portal=portal@entry=0x7f6a6d47f230, count=count@entry=9223372036854775807, isTopLevel=isTopLevel@entry=true, run_once=run_once@entry=true, dest=dest@entry=0x7f6a6d5ed298,
altdest=altdest@entry=0x7f6a6d5ed298, completionTag=completionTag@entry=0x7ffc9c1c19d0 "") at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/pquery.c:799
save_exception_stack = 0x7ffc9c1c1c00
save_context_stack = 0x0
local_sigjmp_buf = {{__jmpbuf = {140722927574016, 7153736072036697610, 140095075869696, 140722927573456, 140095076692528, 140095075869744, 7153736071992657418, 7091693995611473418}, __mask_was_saved = 0, __saved_mask = {__val = {
0, 140723125634669, 0, 140095051070678, 64, 112, 140095075869696, 140095075861648, 140095078191720, 140722927573328, 140095049517496, 140722927573360, 2, 140095075869696, 2, 140722927573360}}}}
result = <optimized out>
nprocessed = <optimized out>
saveTopTransactionResourceOwner = 0x7f6a6d4182c8
saveTopTransactionContext = 0x7f6a6d490150
saveActivePortal = 0x0
saveResourceOwner = 0x7f6a6d4182c8
savePortalContext = 0x0
saveMemoryContext = 0x7f6a6d490150
__func__ = "PortalRun"
#12 0x00007f6a6b957f7f in exec_simple_query (
query_string=0x7f6a6d3b45a0 "UPDATE public.schedulecard SET ext_ident=NULL, finalreport_absence_cnt=NULL, period_day=4, period_num=1, rotates=false, rotationgroup_id=NULL, schedulecard_id=NULL, schedulecardblock_id=NULL, schedule"...) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/postgres.c:1122
parsetree = 0x7f6a6d3b6400
portal = 0x7f6a6d47f230
snapshot_set = <optimized out>
commandTag = <optimized out>
completionTag = "\000\032\034\234\374\177\000\000\341\356\247kj\177\000\000\000\000\000\000\000\000\000\000\060\316\367kj\177\000\000\340&\360kj\177\000\000\234U\357kj\177\000\000\240E;mj\177\000\000Q\000\000\000\000\000\000"
querytree_list = <optimized out>
plantree_list = <optimized out>
receiver = 0x7f6a6d5ed298
format = 0
dest = DestRemote
parsetree_list = 0x7f6a6d3b6450
parsetree_item = 0x7f6a6d3b6430
save_log_statement_stats = false
was_logged = false
use_implicit_block = false
msec_str = "\000\032\034\234\374\177\000\000\341\356\247kj\177\000\000\000\000\000\000\000\000\000\000\060\316\367kj\177\000"
__func__ = "exec_simple_query"
#13 0x00007f6a6b95930a in PostgresMain (argc=<optimized out>, argv=argv@entry=0x7f6a6d400220, dbname=0x7f6a6d4001b0 "mydb", username=<optimized out>)
at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/postgres.c:4159
query_string = 0x7f6a6d3b45a0 "UPDATE public.schedulecard SET ext_ident=NULL, finalreport_absence_cnt=NULL, period_day=4, period_num=1, rotates=false, rotationgroup_id=NULL, schedulecard_id=NULL, schedulecardblock_id=NULL, schedule"...
firstchar = 81
input_message = {
data = 0x7f6a6d3b45a0 "UPDATE public.schedulecard SET ext_ident=NULL, finalreport_absence_cnt=NULL, period_day=4, period_num=1, rotates=false, rotationgroup_id=NULL, schedulecard_id=NULL, schedulecardblock_id=NULL, schedule"..., len = 319, maxlen = 1024, cursor = 319}
local_sigjmp_buf = {{__jmpbuf = {140095054460088, 7153736071902479882, 140095054460096, 1, 0, 140095076133088, 7153736071933937162, 7091693993451144714}, __mask_was_saved = 1, __saved_mask = {__val = {0, 140095076133088,
140095004765576, 844424930131970, 206158430256, 140722927574360, 140722927574144, 293, 140095076133088, 0, 0, 140095075861648, 1024, 140722927574468, 0, 140722927574240}}}}
send_ready_for_query = false
disable_idle_in_transaction_timeout = false
__func__ = "PostgresMain"
#14 0x00007f6a6b67867d in BackendRun (port=0x7f6a6d3f68e0) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/postmaster/postmaster.c:4361
ac = 1
secs = 594476990
usecs = 933865
i = 1
av = 0x7f6a6d400220
maxac = <optimized out>
#15 BackendStartup (port=0x7f6a6d3f68e0) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/postmaster/postmaster.c:4033
bn = <optimized out>
pid = <optimized out>
#16 ServerLoop () at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/postmaster/postmaster.c:1706
rmask = {fds_bits = {8, 0 <repeats 15 times>}}
selres = <optimized out>
now = <optimized out>
readmask = {fds_bits = {200, 0 <repeats 15 times>}}
last_lockfile_recheck_time = 1541161751
last_touch_time = 1541161147
__func__ = "ServerLoop"
#17 0x00007f6a6b8e7644 in PostmasterMain (argc=5, argv=<optimized out>) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/postmaster/postmaster.c:1379
opt = <optimized out>
status = <optimized out>
userDoption = <optimized out>
listen_addr_saved = true
i = <optimized out>
output_config_variable = <optimized out>
__func__ = "PostmasterMain"
#18 0x00007f6a6b679616 in main (argc=5, argv=0x7f6a6d3af1f0) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/main/main.c:228
No locals.
Best regards,
Ondřej Bouda
---------- Původní e-mail ----------
Od: Tom Lane <tgl@sss.pgh.pa.us>
Komu: Karsten Hilbert <Karsten.Hilbert@gmx.net>
Datum: 3. 11. 2018 4:57:19
Předmět: Re: backend crash on DELETE, reproducible locally
Ondřej Bouda
---------- Původní e-mail ----------
Od: Tom Lane <tgl@sss.pgh.pa.us>
Komu: Karsten Hilbert <Karsten.Hilbert@gmx.net>
Datum: 3. 11. 2018 4:57:19
Předmět: Re: backend crash on DELETE, reproducible locally
Karsten Hilbert <Karsten.Hilbert@gmx.net> writes:
> On Thu, Nov 01, 2018 at 11:43:56AM -0400, Tom Lane wrote:
>> Yeah, apparently we've passed a null OLD tuple to an RI_FKey_cascade_del
>> trigger, which surely shouldn't happen. It'd be interesting to look at
>> the set of triggers on this table. I don't entirely trust psql's \d
>> to show us reality if there's something screwed up about the triggers,
>> so in addition to \d output, could we see
>> select * from pg_trigger where tgrelid = 'ref.auto_hint'::regclass;
> [ pretty normal-looking trigger entries ]
I was feeling baffled about this, but it suddenly occurs to me that maybe
the bug fixed in 040a1df61/372102b81 explains this. If the particular
tuple you're trying to delete predates the last ALTER TABLE ADD COLUMN
operation on the table, then this bug would result in t_self getting
set to "invalid", which'd cause AfterTriggerSaveEvent to save "ate_ctid1"
as "invalid", which'd lead to a null tuple getting passed when the trigger
eventually gets invoked.
regards, tom lane
<obouda@email.cz> writes: > we reached the exactly same problem after upgrading to PostgreSQL 11 - the > server crashed on a DELETE statement with a trigger. We also observed an > AFTER DELETE trigger receiving NULL values in OLD. Now I see the problem > seems to be solved (theoretically). Unfortunately, we are not able to build > the server with the patch, so we cannot confirm that. However, when we just > copied the database (within the same server), the same DELETE executed on > the copy with no problems. Yeah, dump/reload would make the problem go away. > I would like to ask, however: could the same problem arise from an UPDATE > statement (also on a table with an AFTER trigger), or would that be another > bug (technical details below)? Same bug. regards, tom lane
On 11/03/2018 02:19 PM, obouda@email.cz wrote:
You can dump that single table, truncate the table (presuming no FKs) and then restore it.
Hello,we reached the exactly same problem after upgrading to PostgreSQL 11 - the server crashed on a DELETE statement with a trigger. We also observed an AFTER DELETE trigger receiving NULL values in OLD. Now I see the problem seems to be solved (theoretically). Unfortunately, we are not able to build the server with the patch, so we cannot confirm that. However, when we just copied the database (within the same server), the same DELETE executed on the copy with no problems.I would like to ask, however: could the same problem arise from an UPDATE statement (also on a table with an AFTER trigger), or would that be another bug (technical details below)?As the bug causes our production servers segfault several times a day, we'd like to solve the problem as soon as possible. Do I understand it correctly that if we dump and restore the database, the bug should not occur (until the next ALTER TABLE ADD COLUMN - which we'll avoid until the patch is released)?
You can dump that single table, truncate the table (presuming no FKs) and then restore it.
--
Angular momentum makes the world go 'round.
Angular momentum makes the world go 'round.
Dne 3.11.2018 v 20:38 Tom Lane napsal(a): > Yeah, dump/reload would make the problem go away. > > Same bug. So we dumped and restored all our databases. After that, the crash on DELETE never occurred (before, it was several times a day). However, the crash on UPDATE still occurs on specific rows. We are quite certain no ALTER TABLE statement was executed on the table after the restore. There are two AFTER INSERT OR UPDATE constraint triggers and one BEFORE INSERT OR UPDATE trigger on the table, all of which are implemented in plpgsql. Multiple physical servers, on separate databases with identical schema, crash on the same type of UPDATE query (different just in concrete values to be updated). The same code worked perfectly on 10.x. See the attached backtrace below. Can we do something else to catch the bug? Or can we hope for this bug to be already fixed and released in the upcoming version? Regards, Ondřej Bouda [New LWP 8307] [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". Core was generated by `postgres: 11/main: usr mydb 127.0.0.1(56829) UPDATE '. Program terminated with signal SIGSEGV, Segmentation fault. #0 __memcmp_sse4_1 () at ../sysdeps/x86_64/multiarch/memcmp-sse4.S:793 #0 __memcmp_sse4_1 () at ../sysdeps/x86_64/multiarch/memcmp-sse4.S:793 No locals. #1 0x00007f6a6b993296 in datumIsEqual (value1=12, value2=12, typByVal=<optimized out>, typLen=16) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/utils/adt/datum.c:249 size1 = 16 size2 = <optimized out> s1 = 0xc <error: Cannot access memory at address 0xc> s2 = 0xc <error: Cannot access memory at address 0xc> res = <optimized out> #2 0x00007f6a6b6b89de in ProjIndexIsUnchanged (newtup=0x7f6a6d5ccdd0, oldtup=0x7ffc9c1c1270, relation=0x7f6a6b4c3680) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/access/heap/heapam.c:4539 att = <optimized out> indexOid = <optimized out> indexDesc = 0x7f6a6b4c7cf0 indexInfo = 0x7f6a6d5cdbf8 i = 0 indexoidlist = <optimized out> econtext = 0x7f6a6d4c2c10 new_isnull = {false, false, false, 156, 252, 127, false, false, 44, 29, 128, 107, 106, 127, false, false, 203, 89, 76, 107, 106, 127, false, false, 123, false, false, false, false, false, false, false} slot = 0x7f6a6d5cf8b8 equals = true old_isnull = {false, false, false, 109, 106, 127, false, false, 69, 46, 80, 109, 106, 127, false, false, 64, 46, 80, 109, 106, 127, false, false, 72, 46, 80, 109, 106, 127, false, false} new_values = {12, 2, 140095078059546, 16, 140722927572064, 140095077232192, 140095077233456, 140722927572079, 0, 0, 140722927572048, 140095054460096, 140095077050704, 140095078061816, 5, 4, 140722927572096, 140095046814123, 140722927572096, 140095046825162, 140095043418528, 140095077051552, 140722927572224, 140095046865098, 60129542543, 23274993280, 140095043418528, 140095077050704, 140095043417728, 140095077050976, 140095078056288, 140095043422512} indexno = 2 l = 0x7f6a6d503840 estate = 0x7f6a6d4c2a00 old_values = {12, 1, 140078631611970, 1, 140095077050704, 1024, 140095077232192, 0, 140722927571760, 140095049517261, 140722927571808, 140722927571776, 140722927571792, 140095046814329, 18, 140095077232200, 140095076976896, 140095078056288, 22, 100, 140722927571856, 140722927571920, 140095046831813, 140722927571856, 140095046842624, 0, 140095077226392, 3, 140095077232192, 140095077232528, 140095077232192, 140095078061816} #3 heap_update (relation=relation@entry=0x7f6a6b4c3680, otid=otid@entry=0x7ffc9c1c1690, newtup=newtup@entry=0x7f6a6d5ccdd0, cid=0, crosscheck=<optimized out>, wait=wait@entry=true, hufd=hufd@entry=0x7ffc9c1c15b0, lockmode=lockmode@entry=0x7ffc9c1c15a4) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/access/heap/heapam.c:4230 result = <optimized out> xid = 245762845 hot_attrs = 0x7f6a6d503740 proj_idx_attrs = 0x7f6a6d503758 key_attrs = 0x7f6a6d503770 id_attrs = 0x7f6a6d503788 interesting_attrs = 0x7f6a6d5037a0 lp = <optimized out> oldtup = {t_len = 85, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 45}, ip_posid = 10}, t_tableOid = 557732818, t_data = 0x7f669913ee10} heaptup = 0x7f6a6d5ccdd0 old_key_tuple = 0x0 old_key_copied = false page = 0x7f669913d180 "]=" block = <optimized out> mxact_status = <optimized out> buffer = <optimized out> newbuf = 14683 vmbuffer = 0 vmbuffer_new = 0 need_toast = <optimized out> newtupsize = <optimized out> pagefree = <optimized out> have_tuple_lock = false iscombo = false use_hot_update = false hot_attrs_checked = <optimized out> key_intact = <optimized out> all_visible_cleared = false all_visible_cleared_new = false checked_lockers = <optimized out> locker_remains = <optimized out> xmax_new_tuple = <optimized out> xmax_old_tuple = 245762845 infomask_old_tuple = 0 infomask2_old_tuple = 8192 infomask_new_tuple = 144 infomask2_new_tuple = 0 __func__ = "heap_update" #4 0x00007f6a6b82f2c9 in ExecUpdate (mtstate=mtstate@entry=0x7f6a6d4d81c0, tupleid=0x7ffc9c1c1690, oldtuple=0x0, slot=0x7f6a6d5cc160, planSlot=planSlot@entry=0x7f6a6d567578, epqstate=epqstate@entry=0x7f6a6d4d8280, estate=estate@entry=0x7f6a6d4d6a60, canSetTag=true) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/executor/nodeModifyTable.c:1209 lockmode = LockTupleNoKeyExclusive partition_constraint_failed = false tuple = 0x7f6a6d5ccdd0 resultRelInfo = 0x7f6a6d4d6ca0 resultRelationDesc = 0x7f6a6b4c3680 result = <optimized out> hufd = {ctid = {ip_blkid = {bi_hi = 5616, bi_lo = 39964}, ip_posid = 32764}, xmax = 1803594893, cmax = 32618} recheckIndexes = 0x0 saved_tcs_map = 0x0 __func__ = "ExecUpdate" #5 0x00007f6a6b82f978 in ExecModifyTable (pstate=0x7f6a6d4d81c0) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/executor/nodeModifyTable.c:2166 node = 0x7f6a6d4d81c0 proute = 0x0 estate = 0x7f6a6d4d6a60 operation = CMD_UPDATE saved_resultRelInfo = 0x0 resultRelInfo = 0x7f6a6d4d6ca0 subplanstate = 0x7f6a6d566988 junkfilter = 0x7f6a6d5cc038 slot = <optimized out> tupleid = <optimized out> tuple_ctid = {ip_blkid = {bi_hi = 0, bi_lo = 45}, ip_posid = 10} oldtupdata = {t_len = 2619086544, t_self = {ip_blkid = {bi_hi = 32764, bi_lo = 0}, ip_posid = 9204}, t_tableOid = 32618, t_data = 0x7ffc9c1c16d0} oldtuple = <optimized out> __func__ = "ExecModifyTable" #6 0x00007f6a6b80b25b in ExecProcNode (node=0x7f6a6d4d81c0) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/include/executor/executor.h:237 No locals. #7 ExecutePlan (execute_once=<optimized out>, dest=0x7f6a6d5e9448, direction=<optimized out>, numberTuples=0, sendTuples=false, operation=CMD_UPDATE, use_parallel_mode=<optimized out>, planstate=0x7f6a6d4d81c0, estate=0x7f6a6d4d6a60) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/executor/execMain.c:1723 slot = <optimized out> current_tuple_count = 0 #8 standard_ExecutorRun (queryDesc=0x7f6a6d566040, direction=<optimized out>, count=0, execute_once=<optimized out>) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/executor/execMain.c:364 estate = 0x7f6a6d4d6a60 operation = CMD_UPDATE dest = 0x7f6a6d5e9448 sendTuples = <optimized out> __func__ = "standard_ExecutorRun" #9 0x00007f6a6b95b41d in ProcessQuery (plan=<optimized out>, sourceText=0x7f6a6d3b45a0 "UPDATE public.schedulecard SET ext_ident=NULL, finalreport_absence_cnt=NULL, period_day=2, period_num=1, rotates=false, rotationgroup_id=NULL, schedulecard_id=NULL, schedulecardblock_id=NULL, schedule"..., params=0x0, queryEnv=0x0, dest=0x7f6a6d5e9448, completionTag=0x7ffc9c1c19d0 "") at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/pquery.c:161 queryDesc = 0x7f6a6d566040 #10 0x00007f6a6b95b668 in PortalRunMulti (portal=portal@entry=0x7f6a6d479d00, isTopLevel=isTopLevel@entry=true, setHoldSnapshot=setHoldSnapshot@entry=false, dest=dest@entry=0x7f6a6d5e9448, altdest=altdest@entry=0x7f6a6d5e9448, completionTag=completionTag@entry=0x7ffc9c1c19d0 "") at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/pquery.c:1286 pstmt = 0x7f6a6d5e92e8 active_snapshot_set = true stmtlist_item = 0x7f6a6d5e93f8 #11 0x00007f6a6b95c2d5 in PortalRun (portal=portal@entry=0x7f6a6d479d00, count=count@entry=9223372036854775807, isTopLevel=isTopLevel@entry=true, run_once=run_once@entry=true, dest=dest@entry=0x7f6a6d5e9448, altdest=altdest@entry=0x7f6a6d5e9448, completionTag=completionTag@entry=0x7ffc9c1c19d0 "") at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/pquery.c:799 save_exception_stack = 0x7ffc9c1c1c00 save_context_stack = 0x0 local_sigjmp_buf = {{__jmpbuf = {140722927574016, 7153736072036697610, 140095075869728, 140722927573456, 140095076670720, 140095075869776, 7153736071992657418, 7091693995611473418}, __mask_was_saved = 0, __saved_mask = {__val = {0, 140723125634669, 0, 140095051070678, 64, 112, 140095075869728, 140095075861648, 140095078175768, 140722927573328, 140095049517496, 140722927573360, 2, 140095075869728, 2, 140722927573360}}}} result = <optimized out> nprocessed = <optimized out> saveTopTransactionResourceOwner = 0x7f6a6d41b148 saveTopTransactionContext = 0x7f6a6d48d8d0 saveActivePortal = 0x0 saveResourceOwner = 0x7f6a6d41b148 savePortalContext = 0x0 saveMemoryContext = 0x7f6a6d48d8d0 __func__ = "PortalRun" #12 0x00007f6a6b957f7f in exec_simple_query (query_string=0x7f6a6d3b45a0 "UPDATE public.schedulecard SET ext_ident=NULL, finalreport_absence_cnt=NULL, period_day=2, period_num=1, rotates=false, rotationgroup_id=NULL, schedulecard_id=NULL, schedulecardblock_id=NULL, schedule"...) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/postgres.c:1122 parsetree = 0x7f6a6d3b6420 portal = 0x7f6a6d479d00 snapshot_set = <optimized out> commandTag = <optimized out> completionTag = "\000\032\034\234\374\177\000\000\341\356\247kj\177\000\000\000\000\000\000\000\000\000\000\060\316\367kj\177\000\000\340&\360kj\177\000\000\234U\357kj\177\000\000\240E;mj\177\000\000Q\000\000\000\000\000\000" querytree_list = <optimized out> plantree_list = <optimized out> receiver = 0x7f6a6d5e9448 format = 0 dest = DestRemote parsetree_list = 0x7f6a6d3b6470 parsetree_item = 0x7f6a6d3b6450 save_log_statement_stats = false was_logged = false use_implicit_block = false msec_str = "\000\032\034\234\374\177\000\000\341\356\247kj\177\000\000\000\000\000\000\000\000\000\000\060\316\367kj\177\000" __func__ = "exec_simple_query" #13 0x00007f6a6b95930a in PostgresMain (argc=<optimized out>, argv=argv@entry=0x7f6a6d400400, dbname=0x7f6a6d4003e0 "mydb", username=<optimized out>) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/postgres.c:4159 query_string = 0x7f6a6d3b45a0 "UPDATE public.schedulecard SET ext_ident=NULL, finalreport_absence_cnt=NULL, period_day=2, period_num=1, rotates=false, rotationgroup_id=NULL, schedulecard_id=NULL, schedulecardblock_id=NULL, schedule"... firstchar = 81 input_message = {data = 0x7f6a6d3b45a0 "UPDATE public.schedulecard SET ext_ident=NULL, finalreport_absence_cnt=NULL, period_day=2, period_num=1, rotates=false, rotationgroup_id=NULL, schedulecard_id=NULL, schedulecardblock_id=NULL, schedule"..., len = 328, maxlen = 1024, cursor = 328} local_sigjmp_buf = {{__jmpbuf = {140095054460088, 7153736071902479882, 140095054460096, 1, 0, 140095076143920, 7153736071933937162, 7091693993451144714}, __mask_was_saved = 1, __saved_mask = {__val = {0, 140095076143920, 140095004765576, 844424930131970, 206158430256, 140722927574360, 140722927574144, 293, 140095076143920, 0, 0, 140095075861648, 1024, 140722927574468, 0, 140722927574240}}}} send_ready_for_query = false disable_idle_in_transaction_timeout = false __func__ = "PostgresMain" #14 0x00007f6a6b67867d in BackendRun (port=0x7f6a6d3f9330) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/postmaster/postmaster.c:4361 ac = 1 secs = 594683937 usecs = 521905 i = 1 av = 0x7f6a6d400400 maxac = <optimized out> #15 BackendStartup (port=0x7f6a6d3f9330) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/postmaster/postmaster.c:4033 bn = <optimized out> pid = <optimized out> #16 ServerLoop () at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/postmaster/postmaster.c:1706 rmask = {fds_bits = {8, 0 <repeats 15 times>}} selres = <optimized out> now = <optimized out> readmask = {fds_bits = {200, 0 <repeats 15 times>}} last_lockfile_recheck_time = 1541368681 last_touch_time = 1541366480 __func__ = "ServerLoop" #17 0x00007f6a6b8e7644 in PostmasterMain (argc=5, argv=<optimized out>) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/postmaster/postmaster.c:1379 opt = <optimized out> status = <optimized out> userDoption = <optimized out> listen_addr_saved = true i = <optimized out> output_config_variable = <optimized out> __func__ = "PostmasterMain" #18 0x00007f6a6b679616 in main (argc=5, argv=0x7f6a6d3af1f0) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/main/main.c:228 No locals. Dne 3.11.2018 v 20:38 Tom Lane napsal(a): > <obouda@email.cz> writes: >> we reached the exactly same problem after upgrading to PostgreSQL 11 - the >> server crashed on a DELETE statement with a trigger. We also observed an >> AFTER DELETE trigger receiving NULL values in OLD. Now I see the problem >> seems to be solved (theoretically). Unfortunately, we are not able to build >> the server with the patch, so we cannot confirm that. However, when we just >> copied the database (within the same server), the same DELETE executed on >> the copy with no problems. > > Yeah, dump/reload would make the problem go away. > >> I would like to ask, however: could the same problem arise from an UPDATE >> statement (also on a table with an AFTER trigger), or would that be another >> bug (technical details below)? > > Same bug. > > regards, tom lane >
For the record: Regarding backend crash when DELETEing tuples older than a recent ALTER TABLE ADD COLUMN: > > > On Fri, Nov 02, 2018 at 11:56:58PM -0400, Tom Lane wrote: > > >> I was feeling baffled about this, but it suddenly occurs to me that maybe > > >> the bug fixed in 040a1df61/372102b81 explains this. > > > > > So, I guess I can work around the issue by the above > > > manoeuvre and report back once 040a1df61/372102b81 is > > > released. > > I will, at any rate, report back when the existing fix is released. Now that PG11.1 is released on Debian postgresql-11: Installiert: 11.1-1 Installationskandidat: 11.1-1 Versionstabelle: *** 11.1-1 500 500 http://httpredir.debian.org/debian unstable/main i386 Packages 100 /var/lib/dpkg/status 11.0-1+b1 990 990 http://httpredir.debian.org/debian buster/main i386 Packages I can report that my issued is fixed by that version. Thanks to all, Karsten -- GPG 40BE 5B0E C98E 1713 AFA6 5BC0 3BEA AC80 7D4F C89B