Re: Index split WAL reduction - Mailing list pgsql-patches
From | Heikki Linnakangas |
---|---|
Subject | Re: Index split WAL reduction |
Date | |
Msg-id | 459A8511.9050901@enterprisedb.com Whole thread Raw |
In response to | Re: Index split WAL reduction ("Heikki Linnakangas" <heikki@enterprisedb.com>) |
Responses |
Re: Index split WAL reduction
Re: Index split WAL reduction Re: Index split WAL reduction |
List | pgsql-patches |
Here's an updated patch that fixes a bug with full_page_writes, and an alignment issue in replay code. Also added a new test case to crashtest.sh that exercises the fixed bug. Has anyone looked at this? I now consider it ready for committing, if no-one sees a problem with it. Here's the original description of the patch: > Currently, an index split writes all the data on the split page to > WAL. That's a lot of WAL traffic. The tuples that are copied to the > right page need to be WAL logged, but the tuples that stay on the > original page don't. > > Here's a patch to do that. It needs further testing, I have used the > attached crude crashtest.sh to test the basics, but we need to test > the more obscure cases like splitting non-leaf or root page. > > On a test case that inserts 10000 rows in increasing key order with a > 100 characters wide text-field as key, the patch reduced the total > generated WAL traffic from 45MB to 33MB, or ~ 25%. Your mileage may > vary, depending on the tuple and key sizes, and the order of inserts. -- Heikki Linnakangas EnterpriseDB http://www.enterprisedb.com Index: src/backend/access/nbtree/nbtinsert.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/access/nbtree/nbtinsert.c,v retrieving revision 1.146 diff -c -r1.146 nbtinsert.c *** src/backend/access/nbtree/nbtinsert.c 11 Nov 2006 01:14:18 -0000 1.146 --- src/backend/access/nbtree/nbtinsert.c 2 Jan 2007 15:58:59 -0000 *************** *** 733,738 **** --- 733,739 ---- rightoff; OffsetNumber maxoff; OffsetNumber i; + bool isroot; rbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); origpage = BufferGetPage(buf); *************** *** 747,752 **** --- 748,755 ---- lopaque = (BTPageOpaque) PageGetSpecialPointer(leftpage); ropaque = (BTPageOpaque) PageGetSpecialPointer(rightpage); + isroot = P_ISROOT(oopaque); + /* if we're splitting this page, it won't be the root when we're done */ /* also, clear the SPLIT_END and HAS_GARBAGE flags in both pages */ lopaque->btpo_flags = oopaque->btpo_flags; *************** *** 926,986 **** MarkBufferDirty(sbuf); } /* XLOG stuff */ if (!rel->rd_istemp) { xl_btree_split xlrec; uint8 xlinfo; XLogRecPtr recptr; ! XLogRecData rdata[4]; ! xlrec.target.node = rel->rd_node; ! ItemPointerSet(&(xlrec.target.tid), itup_blkno, itup_off); if (newitemonleft) ! xlrec.otherblk = BufferGetBlockNumber(rbuf); else ! xlrec.otherblk = BufferGetBlockNumber(buf); ! xlrec.leftblk = lopaque->btpo_prev; ! xlrec.rightblk = ropaque->btpo_next; ! xlrec.level = lopaque->btpo.level; ! /* * Direct access to page is not good but faster - we should implement * some new func in page API. Note we only store the tuples * themselves, knowing that the item pointers are in the same order * and can be reconstructed by scanning the tuples. See comments for * _bt_restore_page(). */ ! xlrec.leftlen = ((PageHeader) leftpage)->pd_special - ! ((PageHeader) leftpage)->pd_upper; ! rdata[0].data = (char *) &xlrec; ! rdata[0].len = SizeOfBtreeSplit; ! rdata[0].buffer = InvalidBuffer; ! rdata[0].next = &(rdata[1]); ! ! rdata[1].data = (char *) leftpage + ((PageHeader) leftpage)->pd_upper; ! rdata[1].len = xlrec.leftlen; ! rdata[1].buffer = InvalidBuffer; ! rdata[1].next = &(rdata[2]); ! ! rdata[2].data = (char *) rightpage + ((PageHeader) rightpage)->pd_upper; ! rdata[2].len = ((PageHeader) rightpage)->pd_special - ((PageHeader) rightpage)->pd_upper; ! rdata[2].buffer = InvalidBuffer; ! rdata[2].next = NULL; if (!P_RIGHTMOST(ropaque)) { ! rdata[2].next = &(rdata[3]); ! rdata[3].data = NULL; ! rdata[3].len = 0; ! rdata[3].buffer = sbuf; ! rdata[3].buffer_std = true; ! rdata[3].next = NULL; } ! if (P_ISROOT(oopaque)) xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L_ROOT : XLOG_BTREE_SPLIT_R_ROOT; else xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L : XLOG_BTREE_SPLIT_R; --- 929,1044 ---- MarkBufferDirty(sbuf); } + /* + * By here, the original data page has been split into two new halves, and + * these are correct. The algorithm requires that the left page never + * move during a split, so we copy the new left page back on top of the + * original. Note that this is not a waste of time, since we also require + * (in the page management code) that the center of a page always be + * clean, and the most efficient way to guarantee this is just to compact + * the data by reinserting it into a new left page. (XXX the latter + * comment is probably obsolete.) + * + * We need to do this before writing the WAL record, so that XLogInsert can + * WAL log an image of the page if necessary. + */ + PageRestoreTempPage(leftpage, origpage); + /* XLOG stuff */ if (!rel->rd_istemp) { xl_btree_split xlrec; uint8 xlinfo; XLogRecPtr recptr; ! XLogRecData rdata[6]; ! XLogRecData *lastrdata; ! xlrec.node = rel->rd_node; ! xlrec.leftsib = BufferGetBlockNumber(buf); ! xlrec.rightsib = BufferGetBlockNumber(rbuf); ! xlrec.firstright = firstright; ! xlrec.rnext = ropaque->btpo_next; ! xlrec.level = lopaque->btpo.level; ! ! rdata[0].data = (char *) &xlrec; ! rdata[0].len = SizeOfBtreeSplit; ! rdata[0].buffer = InvalidBuffer; ! ! lastrdata = &rdata[0]; ! ! /* Log downlink on non-leaf pages. */ ! if (lopaque->btpo.level > 0) ! { ! lastrdata->next = lastrdata + 1; ! lastrdata++; ! ! lastrdata->data = (char *) &newitem->t_tid.ip_blkid; ! lastrdata->len = sizeof(BlockIdData); ! lastrdata->buffer = InvalidBuffer; ! } ! ! /* Log the new item, if it was inserted on the left page. If it was ! * put on the right page, we don't need to explicitly WAL log it ! * because it's included with all the other items on the right page. ! */ ! lastrdata->next = lastrdata + 1; ! lastrdata++; if (newitemonleft) ! { ! lastrdata->data = (char *) &newitemoff; ! lastrdata->len = sizeof(OffsetNumber); ! lastrdata->buffer = buf; /* backup block 1 */ ! lastrdata->buffer_std = true; ! ! lastrdata->next = lastrdata + 1; ! lastrdata++; ! lastrdata->data = (char *)newitem; ! lastrdata->len = newitemsz; ! lastrdata->buffer = buf; /* backup block 1 */ ! lastrdata->buffer_std = true; ! } else ! { ! lastrdata->data = NULL; ! lastrdata->len = 0; ! lastrdata->buffer = buf; /* backup block 1 */ ! lastrdata->buffer_std = true; ! } ! /* Log the contents of the right page in the format understood by ! * _bt_restore_page(). We set lastrdata->buffer to InvalidBuffer, ! * because we're going to recreate the whole page anyway. ! * * Direct access to page is not good but faster - we should implement * some new func in page API. Note we only store the tuples * themselves, knowing that the item pointers are in the same order * and can be reconstructed by scanning the tuples. See comments for * _bt_restore_page(). */ ! lastrdata->next = lastrdata + 1; ! lastrdata++; ! lastrdata->data = (char *) rightpage + ((PageHeader) rightpage)->pd_upper; ! lastrdata->len = ((PageHeader) rightpage)->pd_special - ! ((PageHeader) rightpage)->pd_upper; ! lastrdata->buffer = InvalidBuffer; + /* Log the right sibling, because we've changed it's prev-pointer. */ if (!P_RIGHTMOST(ropaque)) { ! lastrdata->next = lastrdata + 1; ! lastrdata++; ! ! lastrdata->data = NULL; ! lastrdata->len = 0; ! lastrdata->buffer = sbuf; /* backup block 2 */ ! lastrdata->buffer_std = true; } ! lastrdata->next = NULL; ! ! if (isroot) xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L_ROOT : XLOG_BTREE_SPLIT_R_ROOT; else xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L : XLOG_BTREE_SPLIT_R; *************** *** 998,1021 **** } } - /* - * By here, the original data page has been split into two new halves, and - * these are correct. The algorithm requires that the left page never - * move during a split, so we copy the new left page back on top of the - * original. Note that this is not a waste of time, since we also require - * (in the page management code) that the center of a page always be - * clean, and the most efficient way to guarantee this is just to compact - * the data by reinserting it into a new left page. (XXX the latter - * comment is probably obsolete.) - * - * It's a bit weird that we don't fill in the left page till after writing - * the XLOG entry, but not really worth changing. Note that we use the - * origpage data (specifically its BTP_ROOT bit) while preparing the XLOG - * entry, so simply reshuffling the code won't do. - */ - - PageRestoreTempPage(leftpage, origpage); - END_CRIT_SECTION(); /* release the old right sibling */ --- 1056,1061 ---- Index: src/backend/access/nbtree/nbtxlog.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/access/nbtree/nbtxlog.c,v retrieving revision 1.39 diff -c -r1.39 nbtxlog.c *** src/backend/access/nbtree/nbtxlog.c 1 Nov 2006 19:43:17 -0000 1.39 --- src/backend/access/nbtree/nbtxlog.c 2 Jan 2007 16:02:39 -0000 *************** *** 264,385 **** { xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record); Relation reln; ! BlockNumber targetblk; ! OffsetNumber targetoff; ! BlockNumber leftsib; ! BlockNumber rightsib; ! BlockNumber downlink = 0; ! Buffer buffer; ! Page page; ! BTPageOpaque pageop; ! reln = XLogOpenRelation(xlrec->target.node); ! targetblk = ItemPointerGetBlockNumber(&(xlrec->target.tid)); ! targetoff = ItemPointerGetOffsetNumber(&(xlrec->target.tid)); ! leftsib = (onleft) ? targetblk : xlrec->otherblk; ! rightsib = (onleft) ? xlrec->otherblk : targetblk; ! /* Left (original) sibling */ ! buffer = XLogReadBuffer(reln, leftsib, true); ! Assert(BufferIsValid(buffer)); ! page = (Page) BufferGetPage(buffer); ! _bt_pageinit(page, BufferGetPageSize(buffer)); ! pageop = (BTPageOpaque) PageGetSpecialPointer(page); ! pageop->btpo_prev = xlrec->leftblk; ! pageop->btpo_next = rightsib; ! pageop->btpo.level = xlrec->level; ! pageop->btpo_flags = (xlrec->level == 0) ? BTP_LEAF : 0; ! pageop->btpo_cycleid = 0; - _bt_restore_page(page, - (char *) xlrec + SizeOfBtreeSplit, - xlrec->leftlen); ! if (onleft && xlrec->level > 0) { ! IndexTuple itup; ! /* extract downlink in the target tuple */ ! itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, targetoff)); ! downlink = ItemPointerGetBlockNumber(&(itup->t_tid)); ! Assert(ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY); } ! PageSetLSN(page, lsn); ! PageSetTLI(page, ThisTimeLineID); ! MarkBufferDirty(buffer); ! UnlockReleaseBuffer(buffer); ! /* Right (new) sibling */ ! buffer = XLogReadBuffer(reln, rightsib, true); ! Assert(BufferIsValid(buffer)); ! page = (Page) BufferGetPage(buffer); ! _bt_pageinit(page, BufferGetPageSize(buffer)); ! pageop = (BTPageOpaque) PageGetSpecialPointer(page); ! pageop->btpo_prev = leftsib; ! pageop->btpo_next = xlrec->rightblk; ! pageop->btpo.level = xlrec->level; ! pageop->btpo_flags = (xlrec->level == 0) ? BTP_LEAF : 0; ! pageop->btpo_cycleid = 0; ! _bt_restore_page(page, ! (char *) xlrec + SizeOfBtreeSplit + xlrec->leftlen, ! record->xl_len - SizeOfBtreeSplit - xlrec->leftlen); ! if (!onleft && xlrec->level > 0) ! { ! IndexTuple itup; - /* extract downlink in the target tuple */ - itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, targetoff)); - downlink = ItemPointerGetBlockNumber(&(itup->t_tid)); - Assert(ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY); - } ! PageSetLSN(page, lsn); ! PageSetTLI(page, ThisTimeLineID); ! MarkBufferDirty(buffer); ! UnlockReleaseBuffer(buffer); ! /* Fix left-link of right (next) page */ ! if (!(record->xl_info & XLR_BKP_BLOCK_1)) { ! if (xlrec->rightblk != P_NONE) { ! buffer = XLogReadBuffer(reln, xlrec->rightblk, false); ! if (BufferIsValid(buffer)) { ! page = (Page) BufferGetPage(buffer); ! if (XLByteLE(lsn, PageGetLSN(page))) { ! UnlockReleaseBuffer(buffer); } ! else ! { ! pageop = (BTPageOpaque) PageGetSpecialPointer(page); ! pageop->btpo_prev = rightsib; ! PageSetLSN(page, lsn); ! PageSetTLI(page, ThisTimeLineID); ! MarkBufferDirty(buffer); ! UnlockReleaseBuffer(buffer); } } } } ! /* Forget any split this insertion completes */ ! if (xlrec->level > 0) ! forget_matching_split(xlrec->target.node, downlink, false); /* The job ain't done till the parent link is inserted... */ ! log_incomplete_split(xlrec->target.node, ! leftsib, rightsib, isroot); } static void --- 264,428 ---- { xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record); Relation reln; ! Buffer lbuf, rbuf; ! Page lpage, rpage; ! BTPageOpaque ropaque, lopaque; ! char *datapos; ! int datalen; ! bool bkp_left = record->xl_info & XLR_BKP_BLOCK_1; ! bool bkp_nextsib = record->xl_info & XLR_BKP_BLOCK_2; ! OffsetNumber newitemoff; ! Item newitem = NULL; ! Size newitemsz = 0; ! reln = XLogOpenRelation(xlrec->node); ! datapos = (char *) xlrec + SizeOfBtreeSplit; ! datalen = record->xl_len - SizeOfBtreeSplit; ! /* Forget any split this insertion completes */ ! if (xlrec->level > 0) ! { ! BlockNumber downlink = BlockIdGetBlockNumber((BlockId) datapos); ! datapos += sizeof(BlockIdData); ! datalen -= sizeof(BlockIdData); ! ! forget_matching_split(xlrec->node, downlink, false); ! } ! /* Extract newitem and newitemoff */ ! if (!bkp_left && onleft) { ! IndexTupleData itupdata; ! /* Extract the offset of the new tuple and it's contents */ ! memcpy(&newitemoff, datapos, sizeof(OffsetNumber)); ! datapos += sizeof(OffsetNumber); ! datalen -= sizeof(OffsetNumber); ! ! newitem = datapos; ! /* Need to copy tuple header due to alignment considerations */ ! memcpy(&itupdata, datapos, sizeof(IndexTupleData)); ! newitemsz = IndexTupleDSize(itupdata); ! newitemsz = MAXALIGN(newitemsz); ! datapos += newitemsz; ! datalen -= newitemsz; } ! /* Reconstruct right (new) sibling */ ! rbuf = XLogReadBuffer(reln, xlrec->rightsib, true); ! Assert(BufferIsValid(rbuf)); ! rpage = (Page) BufferGetPage(rbuf); ! _bt_pageinit(rpage, BufferGetPageSize(rbuf)); ! ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage); ! ropaque->btpo_prev = xlrec->leftsib; ! ropaque->btpo_next = xlrec->rnext; ! ropaque->btpo.level = xlrec->level; ! ropaque->btpo_flags = (xlrec->level == 0) ? BTP_LEAF : 0; ! ropaque->btpo_cycleid = 0; ! _bt_restore_page(rpage, datapos, datalen); ! PageSetLSN(rpage, lsn); ! PageSetTLI(rpage, ThisTimeLineID); ! MarkBufferDirty(rbuf); ! /* don't release the buffer yet, because reconstructing the left sibling ! * needs to access the data on the right page ! */ ! /* Reconstruct left (original) sibling */ ! if(!bkp_left) { ! lbuf = XLogReadBuffer(reln, xlrec->leftsib, false); ! ! if (BufferIsValid(lbuf)) { ! lpage = (Page) BufferGetPage(lbuf); ! lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage); ! ! if (!XLByteLE(lsn, PageGetLSN(lpage))) { ! /* Remove the items from the left page that were copied to ! * right page, and add the new item if it was inserted to ! * left page. ! */ ! OffsetNumber off; ! OffsetNumber maxoff = PageGetMaxOffsetNumber(lpage); ! ItemId hiItemId; ! Item hiItem; ! for(off = maxoff ; off >= xlrec->firstright; off--) ! PageIndexTupleDelete(lpage, off); ! ! if (onleft) { ! if (PageAddItem(lpage, newitem, newitemsz, newitemoff, ! LP_USED) == InvalidOffsetNumber) ! elog(PANIC, "can't add new item to left sibling after split"); } ! /* Set high key equal to the first key on the right page */ ! hiItemId = PageGetItemId(rpage, P_FIRSTDATAKEY(ropaque)); ! hiItem = PageGetItem(rpage, hiItemId); ! if(!P_RIGHTMOST(lopaque)) ! { ! /* but remove the old high key first */ ! PageIndexTupleDelete(lpage, P_HIKEY); } + + if(PageAddItem(lpage, hiItem, ItemIdGetLength(hiItemId), + P_HIKEY, LP_USED) == InvalidOffsetNumber) + elog(PANIC, "can't add high key after split to left page"); + + /* Fix opaque fields */ + lopaque->btpo_flags = (xlrec->level == 0) ? BTP_LEAF : 0; + lopaque->btpo_next = xlrec->rightsib; + lopaque->btpo_cycleid = 0; + + PageSetLSN(lpage, lsn); + PageSetTLI(lpage, ThisTimeLineID); + MarkBufferDirty(lbuf); } + + UnlockReleaseBuffer(lbuf); } + } ! /* we no longer need the right buffer. */ ! UnlockReleaseBuffer(rbuf); ! ! /* Fix left-link of the page to the right of the new right sibling */ ! if (!bkp_nextsib && xlrec->rnext != P_NONE) ! { ! Buffer buffer = XLogReadBuffer(reln, xlrec->rnext, false); ! if (BufferIsValid(buffer)) ! { ! Page page = (Page) BufferGetPage(buffer); ! ! if (!XLByteLE(lsn, PageGetLSN(page))) ! { ! BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page); ! pageop->btpo_prev = xlrec->rightsib; ! ! PageSetLSN(page, lsn); ! PageSetTLI(page, ThisTimeLineID); ! MarkBufferDirty(buffer); ! } ! UnlockReleaseBuffer(buffer); ! } ! } /* The job ain't done till the parent link is inserted... */ ! log_incomplete_split(xlrec->node, ! xlrec->leftsib, xlrec->rightsib, isroot); } static void *************** *** 727,766 **** { xl_btree_split *xlrec = (xl_btree_split *) rec; ! appendStringInfo(buf, "split_l: "); ! out_target(buf, &(xlrec->target)); ! appendStringInfo(buf, "; oth %u; rgh %u", ! xlrec->otherblk, xlrec->rightblk); break; } case XLOG_BTREE_SPLIT_R: { xl_btree_split *xlrec = (xl_btree_split *) rec; ! appendStringInfo(buf, "split_r: "); ! out_target(buf, &(xlrec->target)); ! appendStringInfo(buf, "; oth %u; rgh %u", ! xlrec->otherblk, xlrec->rightblk); break; } case XLOG_BTREE_SPLIT_L_ROOT: { xl_btree_split *xlrec = (xl_btree_split *) rec; ! appendStringInfo(buf, "split_l_root: "); ! out_target(buf, &(xlrec->target)); ! appendStringInfo(buf, "; oth %u; rgh %u", ! xlrec->otherblk, xlrec->rightblk); break; } case XLOG_BTREE_SPLIT_R_ROOT: { xl_btree_split *xlrec = (xl_btree_split *) rec; ! appendStringInfo(buf, "split_r_root: "); ! out_target(buf, &(xlrec->target)); ! appendStringInfo(buf, "; oth %u; rgh %u", ! xlrec->otherblk, xlrec->rightblk); break; } case XLOG_BTREE_DELETE: --- 770,817 ---- { xl_btree_split *xlrec = (xl_btree_split *) rec; ! appendStringInfo(buf, "split_l: rel %u/%u/%u ", ! xlrec->node.spcNode, xlrec->node.dbNode, ! xlrec->node.relNode); ! appendStringInfo(buf, "left %u, right %u off %u level %u", ! xlrec->leftsib, xlrec->rightsib, ! xlrec->firstright, xlrec->level); break; } case XLOG_BTREE_SPLIT_R: { xl_btree_split *xlrec = (xl_btree_split *) rec; ! appendStringInfo(buf, "split_r: rel %u/%u/%u ", ! xlrec->node.spcNode, xlrec->node.dbNode, ! xlrec->node.relNode); ! appendStringInfo(buf, "left %u, right %u off %u level %u", ! xlrec->leftsib, xlrec->rightsib, ! xlrec->firstright, xlrec->level); break; } case XLOG_BTREE_SPLIT_L_ROOT: { xl_btree_split *xlrec = (xl_btree_split *) rec; ! appendStringInfo(buf, "split_l_root: rel %u/%u/%u ", ! xlrec->node.spcNode, xlrec->node.dbNode, ! xlrec->node.relNode); ! appendStringInfo(buf, "left %u, right %u off %u level %u", ! xlrec->leftsib, xlrec->rightsib, ! xlrec->firstright, xlrec->level); break; } case XLOG_BTREE_SPLIT_R_ROOT: { xl_btree_split *xlrec = (xl_btree_split *) rec; ! appendStringInfo(buf, "split_r_root: rel %u/%u/%u ", ! xlrec->node.spcNode, xlrec->node.dbNode, ! xlrec->node.relNode); ! appendStringInfo(buf, "left %u, right %u off %u level %u", ! xlrec->leftsib, xlrec->rightsib, ! xlrec->firstright, xlrec->level); break; } case XLOG_BTREE_DELETE: Index: src/include/access/nbtree.h =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/access/nbtree.h,v retrieving revision 1.106 diff -c -r1.106 nbtree.h *** src/include/access/nbtree.h 1 Nov 2006 19:43:17 -0000 1.106 --- src/include/access/nbtree.h 2 Jan 2007 14:07:36 -0000 *************** *** 254,260 **** * * Note: the four XLOG_BTREE_SPLIT xl_info codes all use this data record. * The _L and _R variants indicate whether the inserted tuple went into the ! * left or right split page (and thus, whether otherblk is the right or left * page of the split pair). The _ROOT variants indicate that we are splitting * the root page, and thus that a newroot record rather than an insert or * split record should follow. Note that a split record never carries a --- 254,261 ---- * * Note: the four XLOG_BTREE_SPLIT xl_info codes all use this data record. * The _L and _R variants indicate whether the inserted tuple went into the ! * left or right split page (and thus, whether newitemoff and the new item ! * are stored or not. * page of the split pair). The _ROOT variants indicate that we are splitting * the root page, and thus that a newroot record rather than an insert or * split record should follow. Note that a split record never carries a *************** *** 262,278 **** */ typedef struct xl_btree_split { ! xl_btreetid target; /* inserted tuple id */ ! BlockNumber otherblk; /* second block participated in split: */ ! /* first one is stored in target' tid */ ! BlockNumber leftblk; /* prev/left block */ ! BlockNumber rightblk; /* next/right block */ ! uint32 level; /* tree level of page being split */ ! uint16 leftlen; /* len of left page items below */ ! /* LEFT AND RIGHT PAGES TUPLES FOLLOW AT THE END */ } xl_btree_split; ! #define SizeOfBtreeSplit (offsetof(xl_btree_split, leftlen) + sizeof(uint16)) /* * This is what we need to know about delete of individual leaf index tuples. --- 263,283 ---- */ typedef struct xl_btree_split { ! RelFileNode node; ! BlockNumber leftsib; /* orig page / new left page */ ! BlockNumber rightsib; /* new right page */ ! OffsetNumber firstright; /* first item stored on right page */ ! BlockNumber rnext; /* next/right block pointer */ ! uint32 level; /* tree level of page being split */ ! ! /* BlockIdData downlink follows if level > 0 */ ! ! /* OffsetNumber newitemoff follows in the _L variants. */ ! /* New item follows in the _L variants */ ! /* RIGHT PAGES TUPLES FOLLOW AT THE END */ } xl_btree_split; ! #define SizeOfBtreeSplit (offsetof(xl_btree_split, level) + sizeof(uint32)) /* * This is what we need to know about delete of individual leaf index tuples.
Attachment
pgsql-patches by date: