64-bit hashjoins - Mailing list pgsql-hackers
From | Erik Riedel |
---|---|
Subject | 64-bit hashjoins |
Date | |
Msg-id | Uqy6V2G00gNtImTMwA@andrew.cmu.edu Whole thread Raw |
Responses |
Re: [HACKERS] 64-bit hashjoins
Re: [HACKERS] 64-bit hashjoins |
List | pgsql-hackers |
Platform: Alpha, Digital UNIX 4.0D Software: PostgreSQL 6.5 snaphot (11 March 1999) I have two tables as follows: Table = orderfoo +-------------------------+----------------------------+-------+ | Field | Type | Length| +-------------------------+----------------------------+-------+ | o_orderkey | int4 not null | 4 | | o_custkey | int4 not null | 4 | | o_orderstatus | char() not null | 1 | | o_totalprice | float8 not null | 8 | | o_orderdate | date | 4 | | o_orderpriority | char() not null | 15 | | o_clerk | char() not null | 15 | | o_shippriority | int4 not null | 4 | | o_comment | char() not null | 79 | +-------------------------+----------------------------+-------+ Index: orderfoo_index_ Table = customer +-------------------------+----------------------------+-------+ | Field | Type | Length| +-------------------------+----------------------------+-------+ | c_custkey | int4 not null | 4 | | c_name | char() not null | 25 | | c_address | char() not null | 40 | | c_nationkey | int4 not null | 4 | | c_phone | char() not null | 15 | | c_acctbal | float8 not null | 8 | | c_mktsegment | char() not null | 10 | | c_comment | char() not null | 117 | +-------------------------+----------------------------+-------+ Index: customer_index_ and a query: -- -- Query 3 -- select l_orderkey, sum(l_extendedprice*(1-l_discount)) as revenue, o_orderdate, o_shippriority from customer, orderfoo, lineitem where c_mktsegment = 'BUILDING' and c_custkey = o_custkey and l_orderkey = o_orderkey and o_orderdate < '1995-03-15' and l_shipdate > '1995-03-15' group by l_orderkey, o_orderdate, o_shippriority order by revenue desc, o_orderdate; whose plan includes the segment that hash joins the two tables listed above: -> Hash Join (cost=12268.29 size=5284 width=20) -> Seq Scan on orderfoo (cost=8609.00 size=72911 width=16) -> Hash (cost=0.00 size=0 width=0) -> Seq Scan on customer (cost=1031.00 size=1087 width=4) and which crashes the backend during the Hash Join with: Unaligned access pid=28933 <postgres> va=0x14027e934 pc=0x120099430 ra=0x14027e93c inst=0xb74e0010 followed swiftly by: Bus error (core dumped) (note that the unaligned access and the bus error may be unrelated events, but I suspect not - more below) we then have a dbx backtrace that shows: 0 ExecScanHashBucket(hjstate = 0x140238478, bucket = 0x14027e8d8, curtuple = 0x11fffd000, hjclauses = 0x140234118, econtext = 0x140238588) ["nodeHash.c":706, 0x120099434] 1 ExecHashJoin(node = 0x140234258) ["nodeHashjoin.c":288, 0x120099f00] 2 ExecProcNode(node= 0x140234258, parent = 0x1402347d8) ["execProcnode.c":315, 0x120090ffc] 3 ExecNestLoop(node = 0x1402347d8, parent = 0x140236108) ["nodeNestloop.c":160, 0x12009d104] 4 ExecProcNode(node = 0x1402347d8, parent = 0x140236108) ["execProcnode.c":279, 0x120090e7c] 5 createfirstrun(node = 0x140236108) ["psort.c":409, 0x1201762fc] 6 initialrun(node= 0x140236108) ["psort.c":291, 0x120176024] 7 psort_begin(node = 0x140236108, nkeys = 5159, key = 0x14023c560) ["psort.c":150, 0x120175e64] 8 ExecSort(node = 0x140236108) ["nodeSort.c":156, 0x12009e1c4] 9 ExecProcNode(node = 0x140236108,parent = 0x1402366d8) ["execProcnode.c":295, 0x120090f0c] 10 ExecGroupEveryTuple(node = 0x1402366d8) ["nodeGroup.c":104, 0x12009fab4] 11 ExecGroup(node= (nil)) ["nodeGroup.c":56, 0x12009fa0c] 12 ExecProcNode(node = 0x1402366d8, parent = 0x140237020) ["execProcnode.c":303, 0x120090f6c] 13 ExecAgg(node = 0x140237020) ["nodeAgg.c":243, 0x120097064] 14 ExecProcNode(node =0x140237020, parent = 0x140237478) ["execProcnode.c":307, 0x120090f9c] 15 createfirstrun(node = 0x140237478) ["psort.c":409, 0x1201762fc] More (n if no)? 16 initialrun(node = 0x140237478) ["psort.c":291, 0x120176024] 17 psort_begin(node = 0x140237478, nkeys =5159, key = 0x14023cd80) ["psort.c":150, 0x120175e64] 18 ExecSort(node = 0x140237478) ["nodeSort.c":156, 0x12009e1c4] 19 ExecProcNode(node = 0x140237478,parent = 0x140237478) ["execProcnode.c":295, 0x120090f0c] 20 ExecutePlan(estate = 0x140237ea8, plan = 0x140237478, direction = ForwardScanDirection, destfunc = 0x14001b118) ["execMain.c":985, 0x12008f2bc] 21 ExecutorRun(queryDesc = (nil), estate = 0x140237ea8, limoffset = 0x1, limcount = 0x14001b118) ["execMain.c":360, 0x12008e780] 22 ProcessQueryDesc(queryDesc = 0x140237e78, limoffset = (nil), limcount = (nil)) ["pquery.c":334, 0x120123a1c] 23 ProcessQuery(parsetree = 0x1401f3a88, plan = 0x140237478, dest = 536858624) ["pquery.c":377, 0x120123b1c] 24 pg_exec_query_dest(query_string = 0x11fffd7c0 = "select l_orderkey, sum(l_extendedprice*(1-l_discount)) as revenue, o_orderdate, o_shippriority from customer, orderfoo, lineitem where c\...", aclOverride = '^@') ["postgres.c":805, 0x1201217bc] 25 PostgresMain(argv = 0x11ffff808, real_argv = 0x11ffff808) ["postgres.c":703, 0x12012302c] 26 main(argv = 0x11ffff808) ["main.c":103, 0x1200ae28c] (dbx) the offending code in nodeHash.c is near line 700 as: ... if (curtuple == NULL) heapTuple = (HeapTuple) LONGALIGN(ABSADDR(bucket->top)); else heapTuple = (HeapTuple) LONGALIGN(((char *) curtuple + curtuple->t_len + HEAPTUPLESIZE)); while (heapTuple < (HeapTuple) ABSADDR(bucket->bottom)) { heapTuple->t_data = (HeapTupleHeader) ((char *) heapTuple + HEAPTUPLESIZE); inntuple = ExecStoreTuple(heapTuple, /* tuple to store */ hjstate->hj_HashTupleSlot, /*slot */ InvalidBuffer, /* tuple has no buffer */ false); /* do not pfreethis tuple */ ... it crashes at the ExecStoreTuple(). Since it gives that unaligned access error beforehand, I suspect the LONGALIGN() macros in the lines above are the culprits somehow. This takes me to include/utils/memutils.h which has: #if (defined(sun) && ! defined(sparc)) || defined(m68k) #define LONGALIGN(LEN) SHORTALIGN(LEN) #elif defined (__alpha) /* * even though "long alignment" should really be on 8-byte boundaries for * linuxalpha, we want the strictest alignmentto be on 4-byte (int) * boundaries, because otherwise things break when they try to use the * FormData_pg_* structures. --djm 12/12/96 */ #define LONGALIGN(LEN)\ (((long)(LEN) + (sizeof (int) - 1)) & ~(sizeof (int) -1)) #else #define LONGALIGN(LEN)\ (((long)(LEN) + (sizeof (long) - 1)) & ~(sizeof (long) -1)) #endif since I am __alpha (but __osf__, not linux), I get the version with the sizeof(int) instead of the sizeof(long). Can someone explain the comment from djm to me (or is djm still listening somewhere?). At first blush, I suspect that I actually _want_ it to do the latter version of LONGALIGN(), since my longs really are 8 bytes. But when I try to do that instead, I am unable to even run "initdb" - dies with an error like "attribute not found/invalid" (sorry, scrolled away the window with the actual error - I can re-create if the exact message would help anyone). Anyone have suggestions on how I might proceed? Are there known problems (or known-workings) of HashJoin on 64-bit platforms? Erik Riedel Carnegie Mellon University www.cs.cmu.edu/~riedel
pgsql-hackers by date: