From 8efe8f8f94d8f3195ba65b964799ca2c75f971fd Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Wed, 11 Sep 2019 17:46:11 -0700 Subject: [PATCH] Save original new heap TID in insert WAL record. --- src/backend/access/nbtree/nbtinsert.c | 14 ++++++++++++++ src/backend/access/nbtree/nbtxlog.c | 3 +++ src/include/access/nbtxlog.h | 4 +++- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 8fb17d6784..119e3fe5a6 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -1037,6 +1037,7 @@ _bt_insertonpg(Relation rel, Size itemsz; IndexTuple nposting = NULL; IndexTuple oposting; + ItemPointerData orig; page = BufferGetPage(buf); lpageop = (BTPageOpaque) PageGetSpecialPointer(page); @@ -1061,6 +1062,7 @@ _bt_insertonpg(Relation rel, itemsz = IndexTupleSize(itup); itemsz = MAXALIGN(itemsz); /* be safe, PageAddItem will do this but we * need to be consistent */ + memset(&orig, 0, sizeof(ItemPointerData)); /* * Do we need to split an existing posting list item? @@ -1092,6 +1094,8 @@ _bt_insertonpg(Relation rel, Assert(in_posting_offset > 0); oposting = (IndexTuple) PageGetItem(page, itemid); + /* HACK Save orig heap TID for WAL logging */ + ItemPointerCopy(&itup->t_tid, &orig); nposting = _bt_form_newposting(itup, oposting, in_posting_offset); /* Alter new item offset, since effective new item changed */ @@ -1264,6 +1268,7 @@ _bt_insertonpg(Relation rel, xlrec.offnum = itup_off; xlrec.in_posting_offset = in_posting_offset; + xlrec.orig = orig; XLogBeginInsert(); XLogRegisterData((char *) &xlrec, SizeOfBtreeInsert); @@ -1856,6 +1861,15 @@ _bt_split(Relation rel, BTScanInsert itup_key, Buffer buf, Buffer cbuf, * with all the other items on the right page. * Otherwise, save in_posting_offset and newitem to construct * replacing tuple. + * + * FIXME: The same "original new item TID vs. rewritten new item TID" + * issue exists here, but I haven't done anything with that. + * + * FIXME: Be careful about splits where the new item is also the first + * item on the right half -- that would make the posting list that we + * have to update in-place the last item on the left. This is hard to + * test because nbtsplitloc.c will avoid choosing a split point + * between these two. */ xlrec.in_posting_offset = InvalidOffsetNumber; if (replacepostingoff < firstright) diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index de9bc3b101..5bb38beda1 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -189,6 +189,9 @@ btree_xlog_insert(bool isleaf, bool ismeta, XLogReaderState *record) IndexTuple newitem = (IndexTuple) datapos; IndexTuple nposting; + /* Restore newitem to actual original state in _bt_insertonpg() */ + newitem = CopyIndexTuple(newitem); + ItemPointerCopy(&xlrec->orig, &newitem->t_tid); nposting = _bt_form_newposting(newitem, oposting, xlrec->in_posting_offset); Assert(isleaf); diff --git a/src/include/access/nbtxlog.h b/src/include/access/nbtxlog.h index 075baaf6eb..2813e569dc 100644 --- a/src/include/access/nbtxlog.h +++ b/src/include/access/nbtxlog.h @@ -15,6 +15,7 @@ #include "access/xlogreader.h" #include "lib/stringinfo.h" +#include "storage/itemptr.h" #include "storage/off.h" /* @@ -74,9 +75,10 @@ typedef struct xl_btree_insert { OffsetNumber offnum; OffsetNumber in_posting_offset; + ItemPointerData orig; } xl_btree_insert; -#define SizeOfBtreeInsert (offsetof(xl_btree_insert, in_posting_offset) + sizeof(OffsetNumber)) +#define SizeOfBtreeInsert (offsetof(xl_btree_insert, orig) + sizeof(ItemPointerData)) /* * On insert with split, we save all the items going into the right sibling -- 2.17.1