Thread: [BUGS] Bus error in formatting.c NUM_numpart_to_char (9.4.12, 9.6.3, sparc)

[BUGS] Bus error in formatting.c NUM_numpart_to_char (9.4.12, 9.6.3, sparc)

From
"Tom Turelinckx"
Date:
Hi,

I was trying to compile 9.4.12 on sparc from the debian source package in the pgdg repo, but it's failing multiple
regressiontests. One numeric test crashes the backend: 

LOG:  server process (PID 20659) was terminated by signal 10: Bus error
DETAIL:  Failed process was running: SELECT '' AS to_char_6,  to_char(val, 'FMS9999999999999999.999999999999999')
FROMnum_data; 

Reading symbols from /home/turelto/src/tmp/postgresql-9.4-9.4.12/build/src/backend/postgres...done.
[New LWP 20659]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/sparc-linux-gnu/libthread_db.so.1".
Core was generated by `postgres: turelto regression [local] SELECT                                   '.
Program terminated with signal 10, Bus error.
#0  NUM_numpart_to_char (id=3, Np=0xffa04a54)   at
/home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/utils/adt/formatting.c:4419
4419                            if (Np->Num->lsign == NUM_LSIGN_PRE)
(gdb) l
4414                    (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start ==
Np->num_curr))&& 
4415                    (IS_PREDEC_SPACE(Np) == FALSE || (Np->last_relevant && *Np->last_relevant == '.')))
4416            {
4417                    if (IS_LSIGN(Np->Num))
4418                    {
4419                            if (Np->Num->lsign == NUM_LSIGN_PRE)
4420                            {
4421                                    if (Np->sign == '-')
4422                                            strcpy(Np->inout_p, Np->L_negative_sign);
4423                                    else
(gdb) bt full
#0  NUM_numpart_to_char (id=3, Np=0xffa04a54)   at
/home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/utils/adt/formatting.c:4419      end = <optimized out> 
#1  NUM_processor (node=<optimized out>, Num=<optimized out>, inout=<optimized out>, number=<optimized out>,
from_char_input_len=0,to_char_out_pre_spaces=<optimized out>, sign=43, is_to_char=1 '\001',    collid=<optimized out>)
at /home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/utils/adt/formatting.c:4773       n = <optimized
out>      _Np = {is_to_char = 1 '\001', Num = 0xffa04b18, sign = 43, sign_wrote = 0, num_count = 30, num_in = 0,
 num_curr = 15, out_pre_spaces = 15, read_dec = 0, read_post = 0, read_pre = 0,          number = 0x61b440 "0.", '0'
<repeats15 times>, number_p = 0x61b440 "0.", '0' <repeats 15 times>,          inout = 0x61b1d4 "", inout_p = 0x61b1d4
"",last_relevant = 0x61b441 ".", '0' <repeats 15 times>,          L_negative_sign = 0x4c1438 "-", L_positive_sign =
0x3f5638"+", decimal = 0x4cd530 ".",          L_thousands_sep = 0x49dd78 ",", L_currency_symbol = 0x4edd08 " "} 
#2  0x002f2920 in numeric_to_char (fcinfo=0x618bd4)   at
/home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/utils/adt/formatting.c:5181      len = <optimized out>
     value = <optimized out>       fmt = <optimized out>       Num = {pre = 16, post = 15, lsign = -1, flag = 98,
pre_lsign_num= 0, multi = 0, zero_start = 0, zero_end = 0,          need_locale = 1}       format = 0x540d30
result= 0x61b1d0       shouldFree = <optimized out>       out_pre_spaces = <optimized out>       sign = 43       numstr
=0x61b440 "0.", '0' <repeats 15 times>       orgnum = <optimized out>       p = <optimized out>       x = <optimized
out>
#3  0x001aa0f0 in ExecMakeFunctionResultNoSets (fcache=0x618b98, econtext=0x618ab8, isNull=0x619721 "",
isDone=<optimizedout>)   at /home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/executor/execQual.c:2026
   arg = <optimized out>       result = <optimized out>       fcinfo = 0x618bd4       fcusage = {fs = 0x0,
save_f_total_time= {tv_sec = 0, tv_usec = 0}, save_total = {tv_sec = 0, tv_usec = 0},          f_start = {tv_sec = 0,
tv_usec= 0}}       i = <optimized out> 
#4  0x001ae258 in ExecTargetList (isDone=0xffa04c84, itemIsDone=0x6197d8, isnull=0x619720 "", values=0x619710,
econtext=0x618ab8,targetlist=0x6197b0)   at
/home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/executor/execQual.c:5334      gstate = <optimized out>
     tle = <optimized out>       resind = <optimized out>       oldContext = 0x5f6640       tl = 0x6197c8
haveDoneSets= <optimized out> 
#5  ExecProject (projInfo=<optimized out>, isDone=0xffa04c84)   at
/home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/executor/execQual.c:5549      slot = 0x619018
econtext= 0x618ab8       numSimpleVars = <optimized out> 
#6  0x001ae650 in ExecScan (node=0x618a30, accessMtd=0x1bfc80 <SeqNext>, recheckMtd=0x1bfc60 <SeqRecheck>)   at
/home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/executor/execScan.c:207      slot = 0x619088
econtext= 0x618ab8       qual = 0x0       projInfo = 0x619730       isDone = ExprSingleResult       resultSlot =
<optimizedout> 
#7  0x001a7650 in ExecProcNode (node=0x618a30)   at
/home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/executor/execProcnode.c:400      result = <optimized
out>      __func__ = "ExecProcNode" 
#8  0x001a443c in ExecutePlan (dest=0x5d11f8, direction=<optimized out>, numberTuples=0, sendTuples=<optimized out>,
operation=CMD_SELECT,planstate=0x618a30, estate=0x6189a8)   at
/home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/executor/execMain.c:1490      slot = <optimized out>
   current_tuple_count = 0 
#9  standard_ExecutorRun (queryDesc=0x605f88, direction=<optimized out>, count=0)   at
/home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/executor/execMain.c:319      estate = 0x6189a8
operation= CMD_SELECT       dest = 0x5d11f8       sendTuples = <optimized out>       oldcontext = 0x5f66f0 
#10 0x002ac734 in PortalRunSelect (portal=0x5fb768, forward=1 '\001', count=0, dest=0x5d11f8)   at
/home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/tcop/pquery.c:942      queryDesc = 0x605f88
direction= ForwardScanDirection       nprocessed = <optimized out>       __func__ = "PortalRunSelect" 
#11 0x002adabc in PortalRun (portal=0x5fb768, count=2147483647, isTopLevel=1 '\001', dest=0x5d11f8, altdest=0x5d11f8,
completionTag=0xffa05040 "") at /home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/tcop/pquery.c:786
save_exception_stack = 0xffa04f84       save_context_stack = 0x0       local_sigjmp_buf = {{__jmpbuf = {-285518332,
-285518572,292667848}, __mask_was_saved = 0, __saved_mask = {             __val = {4288696024, 5679664, 16777218, 0,
13,1076, 0, 0, 5611552, 5614592, 5459968, 5611520, 5397504,                120, 5681576, 1886539776, 6097184, 0, 0, 0,
0,6097168, 4288696024, 2792652, 90, 6247456, 2, 1, 0, 1,                1024, 0}}}}       result = <optimized out>
nprocessed = <optimized out>       saveTopTransactionResourceOwner = 0x56aa30 
---Type <return> to continue, or q <return> to quit---       saveTopTransactionContext = 0x56ab38
saveActivePortal= 0x0       saveResourceOwner = 0x56aa30       savePortalContext = 0x0       saveMemoryContext =
0x56ab38      __func__ = "PortalRun" 
#12 0x002a9ba4 in exec_simple_query (   query_string=0x5cf4d8 "SELECT '' AS to_char_6,  to_char(val, 'FMS", '9'
<repeats16 times>, ".", '9' <repeats 15 times>, "')    FROM num_data;") at
/home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/tcop/postgres.c:1072      parsetree = 0x5d00e8
portal= 0x5fb768       snapshot_set = <optimized out>       commandTag = <optimized out>       completionTag =
"\000ELECT10\000\000E", '\000' <repeats 35 times>, "\024\000\000\000\024\377\377\377\377\000R\\\304\000R\\P"
plantree_list= 0x5d11e0       receiver = 0x5d11f8       format = 0       dest = DestRemote       parsetree_list =
0x5d0180      save_log_statement_stats = 0 '\000'       was_logged = 0 '\000'       msec_str =
"\000\240P\250\000\006\004\320\000\000\000\000\000X\324d0\n\000\034",'\000' <repeats 11 times>, "\002"
parsetree_item= 0x5d0170       isTopLevel = <optimized out> 
#13 PostgresMain (argc=<optimized out>, argv=<optimized out>, dbname=0x5694a0 "regression", username=<optimized out>)
at/home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/tcop/postgres.c:4100       query_string = 0x5cf4d8
"SELECT'' AS to_char_6,  to_char(val, 'FMS", '9' <repeats 16 times>, ".", '9' <repeats 15 times>, "')    FROM
num_data;"      firstchar = 6095080       input_message = {         data = 0x5cf4d8 "SELECT '' AS to_char_6,
to_char(val,'FMS", '9' <repeats 16 times>, ".", '9' <repeats 15 times>, "')    FROM num_data;", len = 95, maxlen =
1024,cursor = 95}       local_sigjmp_buf = {{__jmpbuf = {-285518572, -285521052, 292654784}, __mask_was_saved = 1,
__saved_mask= {             __val = {0, 0, 0, 0, 1882663060, 1879054856, 1881810088, 0, 1, 0, 0, 0, 1881097612, 4, 0,
0,0, 0, 0, 0,                1886539776, 1886551248, 1886551248, 5821840, 0, 0, 0, 4288696392, 1885505112, 4288696368,
2364184,               0}}}}       send_ready_for_query = 0 '\000'       __func__ = "PostgresMain" 
#14 0x000606b0 in BackendRun (port=0x58d3d8)   at
/home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/postmaster/postmaster.c:4301      ac = 1       secs =
551365461      usecs = 297758       i = 1       av = 0x569610       maxac = <optimized out> 
#15 BackendStartup (port=0x58d3d8)   at
/home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/postmaster/postmaster.c:3964      bn = 0x10c3
pid= 0 
#16 ServerLoop () at /home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/postmaster/postmaster.c:1694
port = 0x58d3d8       rmask = {fds_bits = {16, 0 <repeats 31 times>}}       selres = <optimized out>       now =
<optimizedout>       readmask = {fds_bits = {16, 0 <repeats 31 times>}}       nSockets = 5
last_lockfile_recheck_time= 1498050259       last_touch_time = 1498050259       __func__ = "ServerLoop" 
#17 0x0024d334 in PostmasterMain (argc=8, argv=<optimized out>)   at
/home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/postmaster/postmaster.c:1302      opt = <optimized
out>      status = <optimized out>       userDoption = <optimized out>       listen_addr_saved = <optimized out>
i= <optimized out>       output_config_variable = <optimized out>       __func__ = "PostmasterMain" 
#18 0x000616f0 in main (argc=8, argv=0x568b88)   at
/home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/main/main.c:233
No locals.


It seems this issue was introduced in 9.4.9: 9.4.9, 9.4.10, 9.4.11 and 9.4.12 all crash at the same test at the same
linein formatting.c, but 9.4.8 builds successfully and passes all tests. It also seems that both NUM_numpart_to_char
andthe failing test have not been touched for, eh, decades, so the root cause must be somewhere else. 

The same issue is present in 9.6.3:

LOG:  server process (PID 10632) was terminated by signal 10: Bus error
DETAIL:  Failed process was running: SELECT '' AS to_char_6,  to_char(val, 'FMS9999999999999999.999999999999999')
FROMnum_data; 

Reading symbols from /home/turelto/src/tmp/postgresql-9.6-9.6.3/build/src/backend/postgres...done.
[New LWP 10632]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/sparc-linux-gnu/libthread_db.so.1".
Core was generated by `postgres: turelto regression [local] SELECT                                   '.
Program terminated with signal 10, Bus error.
#0  NUM_numpart_to_char (id=3, Np=0xff924b5c)   at
/home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/utils/adt/formatting.c:4434
4434                            if (Np->Num->lsign == NUM_LSIGN_PRE)
(gdb) l
4429                    (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start ==
Np->num_curr))&& 
4430                    (IS_PREDEC_SPACE(Np) == FALSE || (Np->last_relevant && *Np->last_relevant == '.')))
4431            {
4432                    if (IS_LSIGN(Np->Num))
4433                    {
4434                            if (Np->Num->lsign == NUM_LSIGN_PRE)
4435                            {
4436                                    if (Np->sign == '-')
4437                                            strcpy(Np->inout_p, Np->L_negative_sign);
4438                                    else
(gdb) bt full
#0  NUM_numpart_to_char (id=3, Np=0xff924b5c)   at
/home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/utils/adt/formatting.c:4434      end = <optimized out> 
#1  NUM_processor (node=<optimized out>, Num=<optimized out>, inout=<optimized out>, number=<optimized out>,
from_char_input_len=0,to_char_out_pre_spaces=<optimized out>, sign=43, is_to_char=1 '\001',    collid=<optimized out>)
at /home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/utils/adt/formatting.c:4788       n = <optimized
out>      _Np = {is_to_char = 1 '\001', Num = 0xff924c20, sign = 43, sign_wrote = 0, num_count = 30, num_in = 0,
 num_curr = 15, out_pre_spaces = 15, read_dec = 0, read_post = 0, read_pre = 0,          number = 0x6bdac0 "0.", '0'
<repeats15 times>, number_p = 0x6bdac0 "0.", '0' <repeats 15 times>,          inout = 0x6bd854 "", inout_p = 0x6bd854
"",last_relevant = 0x6bdac1 ".", '0' <repeats 15 times>,          L_negative_sign = 0x555908 "-", L_positive_sign =
0x4789d8"+", decimal = 0x565d10 ".",          L_thousands_sep = 0x52ed18 ",", L_currency_symbol = 0x588920 " "} 
#2  0x00354920 in numeric_to_char (fcinfo=0x6bbae4)   at
/home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/utils/adt/formatting.c:5213      len = <optimized out>
    value = <optimized out>       fmt = <optimized out>       Num = {pre = 16, post = 15, lsign = -1, flag = 98,
pre_lsign_num= 0, multi = 0, zero_start = 0, zero_end = 0,          need_locale = 1}       format = 0x5dc09c
result= 0x6bd850       shouldFree = <optimized out>       out_pre_spaces = <optimized out>       sign = 43       numstr
=0x6bdac0 "0.", '0' <repeats 15 times>       orgnum = <optimized out>       p = <optimized out>       x = <optimized
out>
#3  0x001e16b0 in ExecMakeFunctionResultNoSets (fcache=0x6bbaa8, econtext=0x6bb9c8, isNull=0x6bc229 "",
isDone=<optimizedout>) at /home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/executor/execQual.c:2041
arg = <optimized out>       result = <optimized out>       fcinfo = 0x6bbae4       fcusage = {fs = 0x0,
save_f_total_time= {tv_sec = 1, tv_usec = 0}, save_total = {tv_sec = 0, tv_usec = 0},          f_start = {tv_sec = 0,
tv_usec= 0}}       i = <optimized out> 
#4  0x001e5840 in ExecTargetList (isDone=0xff924d8c, itemIsDone=0x6bc2e0, isnull=0x6bc228 "", values=0x6bc218,
econtext=0x6bb9c8,tupdesc=<optimized out>, targetlist=0x6bc2b8)   at
/home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/executor/execQual.c:5423      gstate = <optimized out>
    tle = <optimized out>       resind = <optimized out>       att = 0x6bc02c       oldContext = 0x655430       tl =
0x6bc2d0      haveDoneSets = <optimized out> 
#5  ExecProject (projInfo=<optimized out>, isDone=0xff924d8c)   at
/home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/executor/execQual.c:5647      slot = 0x6bbf28
econtext= 0x6bb9c8       numSimpleVars = <optimized out> 
#6  0x001e5c84 in ExecScan (node=0x6bb940, accessMtd=0x1faec0 <SeqNext>, recheckMtd=0x1faea0 <SeqRecheck>)   at
/home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/executor/execScan.c:220      slot = 0x6bbf98
econtext= 0x6bb9c8       qual = 0x0       projInfo = 0x6bc238       isDone = ExprSingleResult       resultSlot =
<optimizedout> 
#7  0x001de620 in ExecProcNode (node=0x6bb940)   at
/home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/executor/execProcnode.c:419      result = <optimized
out>      __func__ = "ExecProcNode" 
#8  0x001da304 in ExecutePlan (dest=0x69c620, direction=<optimized out>, numberTuples=0, sendTuples=<optimized out>,
operation=CMD_SELECT,use_parallel_mode=<optimized out>, planstate=0x6bb940, estate=0x6bb838)   at
/home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/executor/execMain.c:1569      slot = <optimized out>
  current_tuple_count = 0 
#9  standard_ExecutorRun (queryDesc=0x6a61f8, direction=<optimized out>, count=<optimized out>)   at
/home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/executor/execMain.c:338      estate = 0x6bb838
operation= CMD_SELECT       dest = 0x69c620       sendTuples = <optimized out>       oldcontext = 0x6549b0 
#10 0x0030606c in PortalRunSelect (portal=0x6233a0, forward=<optimized out>, count=0, dest=0x69c620)   at
/home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/tcop/pquery.c:948      queryDesc = 0x6a61f8
direction= <optimized out>       nprocessed = <optimized out>       __func__ = "PortalRunSelect" 
#11 0x0030747c in PortalRun (portal=0x6233a0, count=2147483647, isTopLevel=1 '\001', dest=0x69c620, altdest=0x69c620,
completionTag=0xff925160 "") at /home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/tcop/pquery.c:789
save_exception_stack= 0xff9250a4       save_context_stack = 0x0       local_sigjmp_buf = {{__jmpbuf = {-488613709,
-488613485,490890671}, __mask_was_saved = 0, __saved_mask = {             __val = {16797688, 6621224, 2, 0, 6165432,
931,0, 0, 6165432, 6168576, 6094848, 6165504, 6053888, 120,                6320304, 1886343168, 6812424, 256, 0, 0, 0,
6812408,4287778808, 3159308, 97, 1130710192, 2, 1, 0, 1,                1024, 0}}}}       result = <optimized out>
nprocessed = <optimized out>       saveTopTransactionResourceOwner = 0x650828       saveTopTransactionContext =
0x655298      saveActivePortal = 0x0       saveResourceOwner = 0x650828       savePortalContext = 0x0
saveMemoryContext= 0x655298       __func__ = "PortalRun" 
#12 0x003033e4 in exec_simple_query (   query_string=0x67dec0 "SELECT '' AS to_char_6,  to_char(val, 'FMS", '9'
<repeats16 times>, ".", '9' <repeats 15 times>, "')    FROM num_data;") at
/home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/tcop/postgres.c:1094      parsetree = 0x67ead0
portal= 0x6233a0       snapshot_set = <optimized out>       commandTag = <optimized out>       completionTag =
"\000ELECT10\000\000E", '\000' <repeats 35 times>, "\024\000\000\000\024\377\377\377\377\000]\000\364\000\\\377",
<incompletesequence \364>       plantree_list = 0x69c608       receiver = 0x69c620       format = 0       dest =
DestRemote      parsetree_list = 0x67eb68       save_log_statement_stats = 0 '\000'       was_logged = 0 '\000'
msec_str= "\000\222Q\310\000\006\324\344\000bs\244\000bt$\000\000\000\034\000\000\000 \000bG0\000\000\000"
parsetree_item= 0x67eb58       isTopLevel = <optimized out> 
#13 PostgresMain (argc=<optimized out>, argv=<optimized out>, dbname=0x606c70 "regression", username=<optimized out>)
at/home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/tcop/postgres.c:4076       query_string = 0x67dec0
"SELECT'' AS to_char_6,  to_char(val, 'FMS", '9' <repeats 16 times>, ".", '9' <repeats 15 times>, "')    FROM
num_data;"      firstchar = 6810320       input_message = {         data = 0x67dec0 "SELECT '' AS to_char_6,
to_char(val,'FMS", '9' <repeats 16 times>, ".", '9' <repeats 15 times>, "')    FROM num_data;", len = 95, maxlen =
1024,cursor = 95}       local_sigjmp_buf = {{__jmpbuf = {-488613485, -488618077, 490879835}, __mask_was_saved = 1,
__saved_mask= {             __val = {0, 0, 0, 0, 1882908820, 4287779072, 1879209388, 0, 0, 0, 1882908820, 1879054832,
1882055848,4,                0, 0, 0, 0, 4287779152, 0, 0, 0, 0, 0, 0, 0, 0, 1498055587, 0, 0, 4294967295, 6052864}}}}
    send_ready_for_query = 0 '\000'       disable_idle_in_transaction_timeout = 0 '\000'       __func__ =
"PostgresMain"
#14 0x0006d6c8 in BackendRun (port=0x627398)   at
/home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/postmaster/postmaster.c:4285      ac = 1       secs =
551370787      usecs = 396920       i = 1       av = 0x62da48       maxac = <optimized out> 
#15 BackendStartup (port=0x627398)   at
/home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/postmaster/postmaster.c:3959      bn = 0x10b3       pid
=0 
#16 ServerLoop () at /home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/postmaster/postmaster.c:1715
port= 0x627398       rmask = {fds_bits = {16, 0 <repeats 31 times>}}       selres = <optimized out>       now =
<optimizedout>       readmask = {fds_bits = {16, 0 <repeats 31 times>}}       nSockets = 5
last_lockfile_recheck_time= 1498055585       last_touch_time = 1498055585       __func__ = "ServerLoop" 
#17 0x0029ba3c in PostmasterMain (argc=8, argv=<optimized out>)   at
/home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/postmaster/postmaster.c:1323      opt = <optimized out>
     status = <optimized out>       userDoption = <optimized out>       listen_addr_saved = <optimized out>       i =
<optimizedout>       output_config_variable = <optimized out>       __func__ = "PostmasterMain" 
#18 0x0006e6d8 in main (argc=8, argv=0x604af8)   at
/home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/backend/main/main.c:228
No locals.


A potentially similar issue was introduced in 9.4.7 and resolved in 9.4.8:

https://www.postgresql.org/message-id/20160413094117.GC21485@msg.credativ.de

I can test patches or provide more information.

Best regards,
Tom Turelinckx




--
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs

Re: [BUGS] Bus error in formatting.c NUM_numpart_to_char (9.4.12,9.6.3, sparc)

From
Alvaro Herrera
Date:
Tom Turelinckx wrote:

> It seems this issue was introduced in 9.4.9: 9.4.9, 9.4.10, 9.4.11 and
> 9.4.12 all crash at the same test at the same line in formatting.c,
> but 9.4.8 builds successfully and passes all tests. It also seems that
> both NUM_numpart_to_char and the failing test have not been touched
> for, eh, decades, so the root cause must be somewhere else.

You're probably misreading the git log, because this code was touched
just before 9.4.9.  See commit
https://git.postgresql.org/pg/commitdiff/20f870fd7cab8446c208a4a9cfa5ec2a441ef69c

-- 
Álvaro Herrera                https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs

"Tom Turelinckx" <tom@turelinckx.be> writes:
> I was trying to compile 9.4.12 on sparc from the debian source package
in the pgdg repo, but it's failing multiple regression tests.

Hm.  What else fails besides the crash you're showing?  It's not very
easy to deduce what's wrong there, but maybe some other symptom would
be more transparent.

> It seems this issue was introduced in 9.4.9: 9.4.9, 9.4.10, 9.4.11 and 9.4.12 all crash at the same test at the same
linein formatting.c, but 9.4.8 builds successfully and passes all tests. It also seems that both NUM_numpart_to_char
andthe failing test have not been touched for, eh, decades, so the root cause must be somewhere else. 

Alvaro's right that there was a change in 9.4.9 in formatting.c, but
that seems unrelated.  And if you're seeing issues elsewhere, a more
global root cause seems what to postulate.

> A potentially similar issue was introduced in 9.4.7 and resolved in 9.4.8:
> https://www.postgresql.org/message-id/20160413094117.GC21485@msg.credativ.de

We never did get a clear explanation of why that crashed on Sparc.
I hypothesized over-aggressive compiler assumptions about alignment,
but there was no convincing evidence for that.  It might be worth taking
a look at the assembly code immediately around the crash point, especially
if you could also get the corresponding code from 9.4.8.
        regards, tom lane


--
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs

Re: [BUGS] Bus error in formatting.c NUM_numpart_to_char (9.4.12, 9.6.3, sparc)

From
"Tom Turelinckx"
Date:
Alvaro Herrera wrote:

> You're probably misreading the git log, because this code was touched
> just before 9.4.9.  See commit
> https://git.postgresql.org/pg/commitdiff/20f870fd7cab8446c208a4a9cfa5ec2a441ef69c

Thanks for the pointer! When I reverse patch that commit against 9.4.12 it builds successfully and passes all tests.

Against the original 9.4.12, only the numeric and sanity check tests fail. Depending on the run, various other tests
appearto fail, but those failures are caused by the numeric test crashing the backend and the tests being run in
parallel:

WARNING:  terminating connection because of crash of another server process
DETAIL:  The postmaster has commanded this server process to roll back the current transaction and exit, because
anotherserver process exited abnormally and possibly corrupted shared memory. 

I've verified that 10~beta1 also fails the numeric test:

Reading symbols from /home/turelto/src/original/postgresql-10-10~beta1/build/src/backend/postgres...done.
[New LWP 20199]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/sparc-linux-gnu/libthread_db.so.1".
Core was generated by `postgres: turelto regression [local] SELECT                                   '.
Program terminated with signal 10, Bus error.
#0  NUM_numpart_to_char (id=3, Np=0xffd22aa4)   at
/home/turelto/src/original/postgresql-10-10~beta1/build/../src/backend/utils/adt/formatting.c:4471
4471                            if (Np->Num->lsign == NUM_LSIGN_PRE)
(gdb) l
4466                    (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start ==
Np->num_curr))&& 
4467                    (IS_PREDEC_SPACE(Np) == FALSE || (Np->last_relevant && *Np->last_relevant == '.')))
4468            {
4469                    if (IS_LSIGN(Np->Num))
4470                    {
4471                            if (Np->Num->lsign == NUM_LSIGN_PRE)
4472                            {
4473                                    if (Np->sign == '-')
4474                                            strcpy(Np->inout_p, Np->L_negative_sign);
4475                                    else

When I reverse patch the above commit against 10~beta1 it also builds successfully and passes all tests.

When I reverse patch the above commit against 9.6.3 it passes the numeric test, but (still) fails the object_address
test:

******** build/src/test/regress/regression.diffs ********
*** /home/turelto/src/tmp/postgresql-9.6-9.6.3/build/../src/test/regress/expected/object_address.out    2017-05-08
23:15:12.000000000+0200 
--- /home/turelto/src/tmp/postgresql-9.6-9.6.3/build/src/test/regress/results/object_address.out        2017-06-22
11:11:23.000000000+0200 
***************
*** 263,273 **** WARNING:  error for policy,{eins,zwei,drei},{}: schema "eins" does not exist WARNING:  error for
policy,{eins,zwei,drei},{integer}:schema "eins" does not exist WARNING:  error for user mapping,{eins},{}: argument
listlength must be exactly 1 
! WARNING:  error for user mapping,{eins},{integer}: user mapping for user "eins" on server "integer" does not exist
WARNING: error for user mapping,{addr_nsp,zwei},{}: argument list length must be exactly 1 
! WARNING:  error for user mapping,{addr_nsp,zwei},{integer}: user mapping for user "addr_nsp" on server "integer" does
notexist WARNING:  error for user mapping,{eins,zwei,drei},{}: argument list length must be exactly 1 
! WARNING:  error for user mapping,{eins,zwei,drei},{integer}: user mapping for user "eins" on server "integer" does
notexist WARNING:  error for default acl,{eins},{}: argument list length must be exactly 1 WARNING:  error for default
acl,{eins},{integer}:unrecognized default ACL object type i WARNING:  error for default acl,{addr_nsp,zwei},{}:
argumentlist length must be exactly 1 
--- 263,273 ---- WARNING:  error for policy,{eins,zwei,drei},{}: schema "eins" does not exist WARNING:  error for
policy,{eins,zwei,drei},{integer}:schema "eins" does not exist WARNING:  error for user mapping,{eins},{}: argument
listlength must be exactly 1 
! WARNING:  error for user mapping,{eins},{integer}: user mapping for user "(null)" on server "integer" does not exist
WARNING: error for user mapping,{addr_nsp,zwei},{}: argument list length must be exactly 1 
! WARNING:  error for user mapping,{addr_nsp,zwei},{integer}: user mapping for user "(null)" on server "integer" does
notexist WARNING:  error for user mapping,{eins,zwei,drei},{}: argument list length must be exactly 1 
! WARNING:  error for user mapping,{eins,zwei,drei},{integer}: user mapping for user "(null)" on server "integer" does
notexist WARNING:  error for default acl,{eins},{}: argument list length must be exactly 1 WARNING:  error for default
acl,{eins},{integer}:unrecognized default ACL object type i WARNING:  error for default acl,{addr_nsp,zwei},{}:
argumentlist length must be exactly 1 

======================================================================

The failing user mapping-related tests were introduced in commit  890192e (which is not in 9.4), and may have been
resolvedby commit 8b6d6cf (which is in 10~beta1). 

Best regards,
Tom Turelinckx




--
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs

Re: [BUGS] Bus error in formatting.c NUM_numpart_to_char (9.4.12, 9.6.3, sparc)

From
"Tom Turelinckx"
Date:
Tom Lane wrote:

> > A potentially similar issue was introduced in 9.4.7 and resolved in 9.4.8:
> > https://www.postgresql.org/message-id/20160413094117.GC21485@msg.creda
> > tiv.de
>
> We never did get a clear explanation of why that crashed on Sparc.
> I hypothesized over-aggressive compiler assumptions about alignment, but there was no convincing evidence for that.
Itmight be worth taking a look at the assembly code immediately around the crash point, especially if you could also
getthe corresponding code from 9.4.8. 

I've already built 9.4.8, passing all tests. I can try to build 9.4.7 to reproduce that crash. I can also try to build
9.4.7with just the relevant commit (0045691) applied to resolve that crash. 

But I need more information / pointers to documentation on how to find and provide the information you need, as I have
noexperience with looking at assembly code. 

Best regards,
Tom Turelinckx




--
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs

"Tom Turelinckx" <tom@turelinckx.be> writes:
> But I need more information / pointers to documentation on how to find and provide the information you need, as I
haveno experience with looking at assembly code. 

The way to get an assembly code file is to substitute -S for -c in the
compile command, and also remove any "-o file" option.  So on my machine,
in an already built PG tree, I check what switches we're using:

$ cd src/backend/utils/adt
$ rm formatting.o
$ make formatting.o
gcc -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Wendif-labels -Wmissing-format-attribute
-Wformat-security-fno-strict-aliasing -fwrapv -g -O2 -I../../../../src/include -D_GNU_SOURCE    -c -o formatting.o
formatting.c

Now I copy-and-paste all the switches except -c and -o:

$ gcc -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Wendif-labels
-Wmissing-format-attribute-Wformat-security -fno-strict-aliasing -fwrapv -g -O2 -I../../../../src/include -D_GNU_SOURCE
-Sformatting.c 

Note it's important that -g be one of the switches, else you don't get
line number annotations in the assembly.

Now I have a formatting.s file with contents like

.L275:
.LBE196:
.LBB197:
.LBB193:.loc 1 2320 0leal    -1(%rcx), %edicmpl    $1, %edijbe    .L278.loc 1 2322 0cmpl    $3, %ecx.p2align 4,,2je
.L279
.LVL216:
.L246:.loc 1 2325 0movq    (%rdx), %raxtestq    %rax, %raxje    .L244.loc 1 2331 0leaq    1(%r14), %rdimovq    %r14,
48(%rsp)movq   %rbx, 64(%rsp).loc 1 2325 0xorl    %r10d, %r10d.loc 1 2331 0movq    %rdx, %rbx 

The important part of this for your purposes is the ".loc" annotations,
which indicate the source line number the following code was generated
from.  Notice that's not unusual for the compiler to rearrange code so
that instructions from different lines are interspersed --- here we
can see that lines 2325 and 2331 got mingled together.  So there might
not be only one .loc annotation for the line where the crash is being
reported.  Anyway, find those annotation(s) and send us all the text
for that area and maybe a few dozen lines on either side.
        regards, tom lane


--
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs

Re: [BUGS] Bus error in formatting.c NUM_numpart_to_char (9.4.12, 9.6.3, sparc)

From
"Tom Turelinckx"
Date:
Tom Lane wrote:

> Anyway, find those annotation(s) and send us all the text for that area and maybe a few dozen lines on either side.

Summary for the current issue, against 9.4.12.

Clean pgdg 9.4.12 fails:

regression.out:
    numeric                  ... FAILED (test process exited with exit code 2)

regression.diffs:
 SELECT '' AS to_char_6,  to_char(val, 'FMS9999999999999999.999999999999999')    FROM num_data;
! server closed the connection unexpectedly
!       This probably means the server terminated abnormally
!       before or while processing the request.
! connection to server was lost

postmaster.log:

LOG:  server process (PID 15999) was terminated by signal 10: Bus error
DETAIL:  Failed process was running: SELECT '' AS to_char_6,  to_char(val, 'FMS9999999999999999.999999999999999')
FROMnum_data; 

Crash location:

Reading symbols from /home/turelto/src/original/postgresql-9.4-9.4.12/build/src/backend/postgres...done.
[New LWP 15999]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/sparc-linux-gnu/libthread_db.so.1".
Core was generated by `postgres: turelto regression [local] SELECT                                   '.
Program terminated with signal 10, Bus error.
#0  NUM_numpart_to_char (id=3, Np=0xff852a34)   at
/home/turelto/src/original/postgresql-9.4-9.4.12/build/../src/backend/utils/adt/formatting.c:4419
4419                            if (Np->Num->lsign == NUM_LSIGN_PRE)
(gdb) l
4414                    (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start ==
Np->num_curr))&& 
4415                    (IS_PREDEC_SPACE(Np) == FALSE || (Np->last_relevant && *Np->last_relevant == '.')))
4416            {
4417                    if (IS_LSIGN(Np->Num))
4418                    {
4419                            if (Np->Num->lsign == NUM_LSIGN_PRE)
4420                            {
4421                                    if (Np->sign == '-')
4422                                            strcpy(Np->inout_p, Np->L_negative_sign);
4423                                    else
(gdb)

Assembly snippet attached as "original-9.4.12-snippet.s" from file generated with:

gcc -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Wendif-labels -Wmissing-format-attribute
-Wformat-security-fno-strict-aliasing -fwrapv -fexcess-precision=standard -g -g -O2 -fstack-protector
--param=ssp-buffer-size=4-Wformat -Werror=format-security -I/usr/include/mit-krb5  -DLINUX_OOM_SCORE_ADJ=0
-I../../../../src/include-I/home/turelto/src/original/postgresql-9.4-9.4.12/build/../src/include -D_FORTIFY_SOURCE=2
-D_GNU_SOURCE-I/usr/include/libxml2  -I/usr/include/tcl8.5  -S
/home/turelto/src/original/postgresql-9.4-9.4.12/build/../src/backend/utils/adt/formatting.c



Patched pgdg 9.4.12 passes all tests.

Applied patch attached as "patch" (reverts commit 20f870f):

Index: postgresql-9.4-9.4.12/src/backend/utils/adt/formatting.c
===================================================================
--- postgresql-9.4-9.4.12.orig/src/backend/utils/adt/formatting.c    2017-05-08 23:19:04.000000000 +0200
+++ postgresql-9.4-9.4.12/src/backend/utils/adt/formatting.c    2017-06-22 09:20:17.000000000 +0200
@@ -4173,12 +4173,12 @@         (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???");#endif
+    if (*Np->inout_p == ' ')
+        Np->inout_p++;
+#define OVERLOAD_TEST    (Np->inout_p >= Np->inout + input_len)#define AMOUNT_TEST(_s)
(input_len-(Np->inout_p-Np->inout)>= _s) 
-    if (OVERLOAD_TEST)
-        return;
-    if (*Np->inout_p == ' ')        Np->inout_p++;
@@ -4316,7 +4316,7 @@         * next char is not digit         */        if (IS_LSIGN(Np->Num) && isread &&
-            (Np->inout_p + 1) < Np->inout + input_len &&
+            (Np->inout_p + 1) <= Np->inout + input_len &&            !isdigit((unsigned char) *(Np->inout_p + 1)))
  {            int            x; 

Assembly snippet attached as "patched-9.4.12-snippet.s" from file generated with:

gcc -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Wendif-labels -Wmissing-format-attribute
-Wformat-security-fno-strict-aliasing -fwrapv -fexcess-precision=standard -g -g -O2 -fstack-protector
--param=ssp-buffer-size=4-Wformat -Werror=format-security -I/usr/include/mit-krb5  -DLINUX_OOM_SCORE_ADJ=0
-I../../../../src/include-I/home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/include -D_FORTIFY_SOURCE=2
-D_GNU_SOURCE-I/usr/include/libxml2  -I/usr/include/tcl8.5  -S
/home/turelto/src/tmp/postgresql-9.4-9.4.12/build/../src/backend/utils/adt/formatting.c

The line number where the crash is being reported before patching (4419) doesn't change after patching, and occurs in
onlyone ".loc" annotation, before and after patching. 

I will try to do the same for the issue from last year, replying to that thread.

Best regards,
Tom Turelinckx


-- 
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs

"Tom Turelinckx" <tom@turelinckx.be> writes:
> Tom Lane wrote:
>> Anyway, find those annotation(s) and send us all the text for that area and maybe a few dozen lines on either side.

> Summary for the current issue, against 9.4.12.

Hm.  The code seems about the same except that the compiler has changed
a few register assignments.  In both cases, the crash has to be coming
from the load instruction in line 4419, unless gdb is totally lying to
us about where the crash is.  And that register was loaded up at line
4389:
.loc 1 4389 0ld    [%fp-80], %g3ld    [%g3+12], %g2....loc 1 4419 0ld    [%g3+8], %g2cmp    %g2, -1

(The "patched" code uses %g4 instead, but otherwise is the same.)
Now, the value loaded has to be a valid pointer, because the load
from [%g3+12] didn't crash.  And nothing in the straight-line
code sequence changed %g3.  But we aren't seeing all of the complicated
if-test at line 4413.  It looks to me like the compiler has put some
of it out-of-line, at labels .LL697 and .LL726 (or .LL692 and .LL693
in the "patched" assembly).  I am guessing that some part of those
code sequences must be trashing %g3 before jumping back to this
sequence at .LL553 or .LL715.

Maybe you could extract those bits too?  Or if you prefer, just send me
the whole .s files off-list.
        regards, tom lane


--
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs

"Tom Turelinckx" <tom@turelinckx.be> writes:
>> Maybe you could extract those bits too?  Or if you prefer, just send me the whole .s files off-list.

> Whole .s files attached.

Thanks.  The short of it seems to be that this is a compiler bug.
In the "original" code, the out-of-line segment at .LL726 is
.loc 1 4415 0ldsb    [%g4], %g4
.LL726:cmp    %g4, 48bne,pt    %icc, .LL715 andcc    %g2, 64, %g0ld    [%g3+4], %g4cmp    %g4, 0be,pt    %icc, .LL715
andcc   %g2, 64, %g0ld    [%fp-24], %g4cmp    %g4, 0be,pn    %icc, .LL727 add    %l7, -2, %g3    <--- trashes %g3ldsb
[%g4], %g4cmp    %g4, 46bne,a,pt %icc, .LL748 xor    %l7, 6, %g2.loc 1 4417 0ba,pt    %xcc, .LL715    <--- returns to
.LL715which needs %g3 andcc    %g2, 64, %g0 

The corresponding part in the "patched" code is

.LL693:
.LLBE547:
.LLBE578:
.LLBB579:
.LLBB556:.loc 1 4415 0cmp    %o7, 48bne,pt    %icc, .LL712 andcc    %g3, 64, %g0ld    [%g4+4], %o7cmp    %o7, 0be,pt
%icc,.LL712 andcc    %g3, 64, %g0ld    [%fp-24], %o7cmp    %o7, 0be,a,pn    %icc, .LL722 add    %g1, -2, %g4    <---
storeto %g4 is annulled if no branchldsb    [%o7], %o7cmp    %o7, 46bne,a,pt %icc, .LL722 add    %g1, -2, %g4    <---
storeto %g4 is annulled if no branch.loc 1 4417 0ba,pt    %xcc, .LL712    <--- returns to .LL712 which needs %g4 andcc
 %g3, 64, %g0 

In short, the compiler is trying to hoist the first instruction for line
4456 into the delay slot of that branch to .LL727, and it's getting it
wrong.  This would've been fine if the compiler had remembered to put the
"a" (annul) flag on the branch instruction, but it forgot.

I think you need to file a bug with the gcc maintainers.  No idea if
there's anything you can give them that's shorter than this full
file, but maybe they won't care.

As a short-term workaround, reducing the -O level might help.
Or perhaps there's a different gcc version you could use?
        regards, tom lane


--
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs

What version of GCC are you testing with?


-- 
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs

Re: [BUGS] Bus error in formatting.c NUM_numpart_to_char (9.4.12, 9.6.3, sparc)

From
"Tom Turelinckx"
Date:
Tom Lane wrote:

> Or perhaps there's a different gcc version you could use?

Greg Stark wrote:

> What version of GCC are you testing with?

Wheezy on sparc has gcc-4.4 (4.4.7), gcc-4.6 (4.6.3) and gcc-4.7 (4.7.2), with gcc-4.6 being the default hence the one
Iwas using. 

Switching to gcc-4.7 resolved this issue, as well as the failing object_address test against 9.6.3. So, with gcc-4.7
postgresql-9.4.12,postgresql-9.6.3 and postgresql-10~beta1 all build successfully and pass all tests on sparc. 

Switching to gcc-4.7 does not solve the alignment issue in 9.4.7 (resolved in 9.4.8) discussed here:

https://www.postgresql.org/message-id/20160413094117.GC21485@msg.credativ.de

Tom Lane wrote:

> We never did get a clear explanation of why that crashed on Sparc.
> I hypothesized over-aggressive compiler assumptions about alignment,
> but there was no convincing evidence for that.  It might be worth taking
> a look at the assembly code immediately around the crash point, especially
> if you could also get the corresponding code from 9.4.8.

I can reproduce that crash against 9.4.7 with both gcc-4.6 and gcc-4.7, and will try to provide the relevant assembly
snippets.

Best regards,
Tom Turelinckx




--
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs

Re: [BUGS] Bus error in formatting.c NUM_numpart_to_char (9.4.12, 9.6.3, sparc)

From
"Tom Turelinckx"
Date:
Tom Turelinckx wrote:

> Switching to gcc-4.7 does not solve the alignment issue in 9.4.7 (resolved in 9.4.8) discussed here:
>
> https://www.postgresql.org/message-id/20160413094117.GC21485@msg.credativ.de
>
> Tom Lane wrote:
>
> > We never did get a clear explanation of why that crashed on Sparc.

With gcc 4.7, against postgresql 9.4.7.

Clean 9.4.7 fails contrib/test-decoding:

LOG:  server process (PID 18295) was terminated by signal 10: Bus error
DETAIL:  Failed process was running: SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL,
'include-xids','0', 'skip-empty-xacts', '1'); 

Reading symbols from /home/turelto/src/947/original/postgresql-9.4-9.4.7/build/src/backend/postgres...done.
[New LWP 18295]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/sparc-linux-gnu/libthread_db.so.1".
Core was generated by `postgres: turelto regression [local] SELECT                                   '.
Program terminated with signal 10, Bus error.
#0  ReorderBufferRestoreChange (data=0x5e1acf "", txn=0x6143a0, rb=0x614318)   at
/home/turelto/src/947/original/postgresql-9.4-9.4.7/build/../src/backend/replication/logical/reorderbuffer.c:2327
2327                                    Size            tuplelen = ((HeapTuple) data)->t_len;
(gdb) l
2322                                    data += tuplelen;
2323                            }
2324
2325                            if (change->data.tp.newtuple)
2326                            {
2327                                    Size            tuplelen = ((HeapTuple) data)->t_len;
2328
2329                                    change->data.tp.newtuple =
2330                                            ReorderBufferGetTupleBuf(rb, tuplelen - offsetof(HeapTupleHeaderData,
t_bits));
2331

After applying this commit from 9.4.8 against 9.4.7, patched 9.4.7 passes all tests:

https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=0045691

Assembly snippet around line 2327 before patching:

.LBB695:
.LBB694:.loc 3 52 0mov    20, %o2add    %o0, 4, %o0
.LVL301:call    memcpy, 0 add    %l6, %l4, %l7
.LVL302:
.LBE694:
.LBE695:.loc 1 2318 0ld    [%l5+28], %g2
.LBB696:
.LBB697:.loc 3 52 0mov    %l6, %o1mov    %l4, %o2
.LBE697:
.LBE696:.loc 1 2318 0add    %g2, 35, %g1and    %g1, -8, %g1.loc 1 2317 0st    %g1, [%g2+20]
.LVL303:.loc 1 2321 0ld    [%l5+28], %g1
.LBB699:
.LBB698:.loc 3 52 0call    memcpy, 0 ld    [%g1+20], %o0
.LVL304:
.LBE698:
.LBE699:
.LBE691:.loc 1 2325 0ld    [%l5+32], %g1
.L355:cmp    %g1, 0be,a,pn    %icc, .L356 ld    [%i1+84], %g2
.LBB700:.loc 1 2327 0ld    [%l7], %l6
.LVL305:.loc 1 2330 0mov    %i0, %o0call    ReorderBufferGetTupleBuf, 0 add    %l6, -23, %o1
.LVL306:.loc 1 2329 0st    %o0, [%l5+32]
.LVL307:
.LBB701:
.LBB702:.loc 3 52 0mov    %l7, %o1mov    20, %o2
.LVL308:call    memcpy, 0 add    %o0, 4, %o0
.LVL309:
.LBE702:
.LBE701:.loc 1 2339 0ld    [%l5+32], %g1
.LBB703:
.LBB704:.loc 3 52 0add    %l7, 20, %o1
.LVL310:mov    %l6, %o2
.LBE704:
.LBE703:.loc 1 2339 0add    %g1, 35, %g2and    %g2, -8, %g2.loc 1 2338 0st    %g2, [%g1+20]
.LVL311:.loc 1 2342 0ld    [%l5+32], %g1
.LBB706:
.LBB705:.loc 3 52 0call    memcpy, 0 ld    [%g1+20], %o0
.LVL312:
.LBE705:
.LBE706:
.LBE700:
.LBB707:
.LBB708:

Assembly snippet after patching:

.LBB699:
.LBB698:.loc 3 52 0mov    20, %o2add    %o0, 4, %o0
.LVL301:call    memcpy, 0 add    %l6, %l4, %l7
.LVL302:
.LBE698:
.LBE699:.loc 1 2322 0ld    [%l5+28], %g2
.LBB700:
.LBB701:.loc 3 52 0mov    %l6, %o1mov    %l4, %o2
.LBE701:
.LBE700:.loc 1 2322 0add    %g2, 35, %g1and    %g1, -8, %g1.loc 1 2321 0st    %g1, [%g2+20]
.LVL303:.loc 1 2325 0ld    [%l5+28], %g1
.LBB703:
.LBB702:.loc 3 52 0call    memcpy, 0 ld    [%g1+20], %o0
.LVL304:
.LBE702:
.LBE703:
.LBE695:.loc 1 2329 0ld    [%l5+32], %g1
.L355:cmp    %g1, 0be,a,pn    %icc, .L356 ld    [%i1+84], %g2
.LVL305:
.LBB704:
.LBB705:
.LBB706:.loc 3 52 0ldub    [%l7], %g1
.LBE706:
.LBE705:.loc 1 2338 0mov    %i0, %o0
.LBB708:
.LBB707:.loc 3 52 0stb    %g1, [%fp-1036]
.LVL306:ldub    [%l7+1], %g1stb    %g1, [%fp-1035]ldub    [%l7+2], %g1stb    %g1, [%fp-1034]ldub    [%l7+3], %g1stb
%g1,[%fp-1033] 
.LBE707:
.LBE708:.loc 1 2338 0ld    [%fp-1036], %o1call    ReorderBufferGetTupleBuf, 0 add    %o1, -23, %o1
.LVL307:.loc 1 2337 0st    %o0, [%l5+32]
.LVL308:
.LBB709:
.LBB710:.loc 3 52 0mov    %l7, %o1mov    20, %o2
.LVL309:call    memcpy, 0 add    %o0, 4, %o0
.LVL310:
.LBE710:
.LBE709:.loc 1 2347 0ld    [%l5+32], %g1
.LBB711:
.LBB712:.loc 3 52 0add    %l7, 20, %o1
.LVL311:ld    [%fp-1036], %o2
.LBE712:
.LBE711:.loc 1 2347 0add    %g1, 35, %g2and    %g2, -8, %g2.loc 1 2346 0st    %g2, [%g1+20]
.LVL312:.loc 1 2350 0ld    [%l5+32], %g1
.LBB714:
.LBB713:.loc 3 52 0call    memcpy, 0 ld    [%g1+20], %o0
.LVL313:
.LBE713:
.LBE714:
.LBE704:
.LBB715:
.LBB716:

Best regards,
Tom Turelinckx




--
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs

On 27 June 2017 at 13:05, Tom Turelinckx <tom@turelinckx.be> wrote:
>
>> What version of GCC are you testing with?
>
> Wheezy on sparc has gcc-4.4 (4.4.7), gcc-4.6 (4.6.3) and gcc-4.7 (4.7.2), with gcc-4.6 being the default hence the
oneI was using.
 


Well that's curious.

I revived build farm member burbot which is an Ultra 5 -- also running
wheezy and it didn't report any issues. It's using gcc 4.6 as well:

https://buildfarm.postgresql.org/cgi-bin/show_history.pl?nm=burbot&br=REL9_6_STABLE

-- 
greg


-- 
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs

Re: [BUGS] Bus error in formatting.c NUM_numpart_to_char (9.4.12, 9.6.3, sparc)

From
"Tom Turelinckx"
Date:
Greg Stark wrote:

> I revived build farm member burbot which is an Ultra 5 -- also running wheezy

Thanks for that!

> and it didn't report any issues. It's using gcc 4.6 as well:

With default settings, the latest (2017-06-13) buildfarm client builds REL9_4_STABLE successfully for me as well (on
wheezyon sparc), passing all tests, with gcc 4.6. 

However, I was building from the Debian source packages in the pgdg repository (https://wiki.postgresql.org/wiki/Apt)
andtheir default settings are different from those of the buildfarm client. I've been trying to reconfigure a build
farmclient such that it would build binaries identical to the Debian source package build (on wheezy on sparc), but I
haven'tquite succeeded yet. 

I did, however, figure out the minimal change to the buildfarm client default settings that makes the numeric
regressiontest fail: 

*** build-farm.conf.sample      2017-06-13 17:32:45.000000000 +0200
--- build-farm.conf.fails.minimal       2017-06-29 11:43:40.000000000 +0200
***************
*** 188,194 ****      config_opts =>[         qw(
-           --enable-cassert           --enable-debug           --enable-nls           --with-perl
--- 188,193 ----

With gcc 4.6, building without --enable-cassert will make the numeric regression test fail, while building with
--enable-cassertwill make the build pass all tests. With gcc 4.7, the build will pass all tests in both cases. Tested
againstREL9_4_STABLE only for the moment. 

FWIW, this is where I've gotten with getting the buildfarm client to build as close as possible to the Debian source
package(9.4.12 on wheezy on sparc), but it's not quite identical yet: 

*** build-farm.conf.sample      2017-06-13 17:32:45.000000000 +0200
--- build-farm.conf.debian      2017-06-29 10:29:26.000000000 +0200
***************
*** 176,181 ****
--- 176,184 ----          # comment out if not using ccache         CC => 'ccache gcc',
+         PYTHON => '/usr/bin/python',
+         CFLAGS => '-g -O2 -fstack-protector --param=ssp-buffer-size=4 -Wformat -Werror=format-security
-I/usr/include/mit-krb5 -DLINUX_OOM_SCORE_ADJ=0 -D_FORTIFY_SOURCE=2', 
+         LDFLAGS => '-Wl,-z,relro -Wl,-z,now -Wl,--as-needed -L/usr/lib/mit-krb5 -L/usr/lib/sparc-linux-gnu/mit-krb5',
   },      # don't use --prefix or --with-pgport here 
***************
*** 188,204 ****      config_opts =>[         qw(
!           --enable-cassert
!           --enable-debug
!           --enable-nls           --with-perl           --with-python
!           --with-tcl
!           --with-gssapi           --with-openssl
-           --with-ldap           --with-libxml           --with-libxslt           )     ],
--- 191,217 ----      config_opts =>[         qw(
!           --with-tcl           --with-perl           --with-python
!           --with-pam           --with-openssl           --with-libxml           --with-libxslt
+           --with-tclconfig=/usr/lib/tcl8.5
+           --with-includes=/usr/include/tcl8.5
+           --enable-nls
+           --enable-integer-datetimes
+           --enable-thread-safety
+           --enable-tap-tests
+           --enable-debug
+           --disable-rpath
+           --with-uuid=e2fs
+           --with-gnu-ld
+           --with-system-tzdata=/usr/share/zoneinfo
+           --with-krb5
+           --with-gssapi
+           --with-ldap           )     ],

Best regards,
Tom Turelinckx




--
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs