Merge join performance - Mailing list pgsql-patches
From | Heikki Linnakangas |
---|---|
Subject | Merge join performance |
Date | |
Msg-id | 44EC42B8.7070708@enterprisedb.com Whole thread Raw |
Responses |
Re: Merge join performance
(Tom Lane <tgl@sss.pgh.pa.us>)
|
List | pgsql-patches |
It seems that the page-at-a-time-index-scan patch applied in the spring caused a slight performance regression to merge joins. The btree mark/restore became much more expensive, as btmarkpos now has to copy the array of item pointers retrieved from the current index page. That adds up, because merge join calls markpos for every tuple. We can make markpos fast, if we make the copy lazily in _bt_steppage, see attached patch. I did some micro-benchmarking of merge join performance, see attached test. Test results, on my laptop: 8_1_STABLE: 1.77 s HEAD, with patch: 1.65 s HEAD, without patch: 2.46 s The results are pretty stable, within 0.1 s. -- Heikki Linnakangas EnterpriseDB http://www.enterprisedb.com Index: src/backend/access/nbtree/nbtree.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/access/nbtree/nbtree.c,v retrieving revision 1.149 diff -c -r1.149 nbtree.c *** src/backend/access/nbtree/nbtree.c 10 May 2006 23:18:39 -0000 1.149 --- src/backend/access/nbtree/nbtree.c 23 Aug 2006 11:54:09 -0000 *************** *** 368,373 **** --- 368,374 ---- { so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData)); so->currPos.buf = so->markPos.buf = InvalidBuffer; + so->markItemIndex = -1; if (scan->numberOfKeys > 0) so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData)); else *************** *** 392,397 **** --- 393,399 ---- ReleaseBuffer(so->markPos.buf); so->markPos.buf = InvalidBuffer; } + so->markItemIndex = -1; /* * Reset the scan keys. Note that keys ordering stuff moved to _bt_first. *************** *** 430,435 **** --- 432,438 ---- ReleaseBuffer(so->markPos.buf); so->markPos.buf = InvalidBuffer; } + so->markItemIndex = -1; if (so->killedItems != NULL) pfree(so->killedItems); *************** *** 456,469 **** so->markPos.buf = InvalidBuffer; } ! /* bump pin on current buffer for assignment to mark buffer */ if (BTScanPosIsValid(so->currPos)) { ! IncrBufferRefCount(so->currPos.buf); ! memcpy(&so->markPos, &so->currPos, ! offsetof(BTScanPosData, items[1]) + ! so->currPos.lastItem * sizeof(BTScanPosItem)); ! } PG_RETURN_VOID(); } --- 459,472 ---- so->markPos.buf = InvalidBuffer; } ! /* Record the current itemIndex we're on. If we later step to next page ! * before releasing the marked position, _bt_steppage makes a full copy ! * of the currPos-struct. */ if (BTScanPosIsValid(so->currPos)) { ! so->markItemIndex = so->currPos.itemIndex; ! } else ! so->markItemIndex = -1; PG_RETURN_VOID(); } *************** *** 477,500 **** IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; ! /* we aren't holding any read locks, but gotta drop the pin */ ! if (BTScanPosIsValid(so->currPos)) { ! /* Before leaving current page, deal with any killed items */ ! if (so->numKilled > 0 && ! so->currPos.buf != so->markPos.buf) ! _bt_killitems(scan, false); ! ReleaseBuffer(so->currPos.buf); ! so->currPos.buf = InvalidBuffer; ! } ! ! /* bump pin on marked buffer */ ! if (BTScanPosIsValid(so->markPos)) { ! IncrBufferRefCount(so->markPos.buf); ! memcpy(&so->currPos, &so->markPos, ! offsetof(BTScanPosData, items[1]) + ! so->markPos.lastItem * sizeof(BTScanPosItem)); } PG_RETURN_VOID(); --- 480,512 ---- IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; ! if (so->markItemIndex != -1) { ! /* The restore position was on the same page. ! * Just restore the itemIndex */ ! so->currPos.itemIndex = so->markItemIndex; ! } ! else { ! /* we aren't holding any read locks, but gotta drop the pin */ ! if (BTScanPosIsValid(so->currPos)) ! { ! /* Before leaving current page, deal with any killed items */ ! if (so->numKilled > 0 && ! so->currPos.buf != so->markPos.buf) ! _bt_killitems(scan, false); ! ReleaseBuffer(so->currPos.buf); ! so->currPos.buf = InvalidBuffer; ! } ! ! if (BTScanPosIsValid(so->markPos)) ! { ! /* bump pin on marked buffer for assignment to current buffer */ ! IncrBufferRefCount(so->markPos.buf); ! memcpy(&so->currPos, &so->markPos, ! offsetof(BTScanPosData, items[1]) + ! so->markPos.lastItem * sizeof(BTScanPosItem)); ! } } PG_RETURN_VOID(); Index: src/backend/access/nbtree/nbtsearch.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/access/nbtree/nbtsearch.c,v retrieving revision 1.105 diff -c -r1.105 nbtsearch.c *** src/backend/access/nbtree/nbtsearch.c 7 May 2006 01:21:30 -0000 1.105 --- src/backend/access/nbtree/nbtsearch.c 23 Aug 2006 11:11:45 -0000 *************** *** 1055,1060 **** --- 1055,1074 ---- rel = scan->indexRelation; + /* Before we modify currPos, make a copy of the items if there + * was a marked position that needs them. */ + if (so->markItemIndex != -1) + { + /* bump pin on current buffer for assignment to marked buffer */ + IncrBufferRefCount(so->currPos.buf); + memcpy(&so->markPos, &so->currPos, + offsetof(BTScanPosData, items[1]) + + so->currPos.lastItem * sizeof(BTScanPosItem)); + + so->markPos.itemIndex = so->markItemIndex; + so->markItemIndex = -1; + } + if (ScanDirectionIsForward(dir)) { /* Walk right to the next page with data */ Index: src/include/access/nbtree.h =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/access/nbtree.h,v retrieving revision 1.103 diff -c -r1.103 nbtree.h *** src/include/access/nbtree.h 7 Aug 2006 16:57:57 -0000 1.103 --- src/include/access/nbtree.h 23 Aug 2006 11:07:47 -0000 *************** *** 441,446 **** --- 441,450 ---- /* keep these last in struct for efficiency */ BTScanPosData currPos; /* current position data */ BTScanPosData markPos; /* marked position, if any */ + int markItemIndex; /* if the marked position is on the same + page as current position, we don't use + markPos, but just keep the marked + itemIndex in markItemIndex */ } BTScanOpaqueData; typedef BTScanOpaqueData *BTScanOpaque;
Attachment
pgsql-patches by date: