From 38e676b20ab57370e3761898f3657dc64329f211 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 4 Apr 2019 18:05:48 +0300 Subject: [PATCH 2/2] Use full 64-bit XID for checking if a deleted GiST page is old enough. Otherwise, after a deleted page gets even older, it becomes unrecyclable again. B-tree has the same problem, and has had since time immemorial, but let's at least fix this in GiST, where this is new. --- src/backend/access/gist/gistutil.c | 63 ++++++++++++++++++++++---- src/backend/access/gist/gistvacuum.c | 4 +- src/backend/access/gist/gistxlog.c | 20 ++++++-- src/backend/access/rmgrdesc/gistdesc.c | 6 ++- src/include/access/gist.h | 20 +++++++- src/include/access/gist_private.h | 4 +- src/include/access/gistxlog.h | 2 +- 7 files changed, 98 insertions(+), 21 deletions(-) diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 94b6ad6a59..f6bbc0426c 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -839,16 +839,16 @@ gistNewBuffer(Relation r) gistcheckpage(r, buffer); /* - * Otherwise, recycle it if deleted, and too old to have any processes - * interested in it. + * Otherwise, recycle it if deleted, and too old to have any + * processes interested in it. */ if (gistPageRecyclable(page)) { /* - * If we are generating WAL for Hot Standby then create a - * WAL record that will allow us to conflict with queries - * running on standby, in case they have snapshots older - * than the page's deleteXid. + * If we are generating WAL for Hot Standby then create a WAL + * record that will allow us to conflict with queries running + * on standby, in case they have snapshots older than the + * page's deleteXid. */ if (XLogStandbyInfoActive() && RelationNeedsWAL(r)) gistXLogPageReuse(r, blkno, GistPageGetDeleteXid(page)); @@ -882,9 +882,54 @@ gistNewBuffer(Relation r) bool gistPageRecyclable(Page page) { - return PageIsNew(page) || - (GistPageIsDeleted(page) && - TransactionIdPrecedes(GistPageGetDeleteXid(page), RecentGlobalXmin)); + if (PageIsNew(page)) + return true; + if (GistPageIsDeleted(page)) + { + /* + * The page was deleted, but when? If it was just deleted, a scan + * might have seen the downlink to it, and will read the page later. + * As long as that can happen, we must keep the deleted page around as + * a tombstone. + * + * Compare the deletion XID with RecentGlobalXmin. If deleteXid < + * RecentGlobalXmin, then no scan that's still in progress could have + * seen its downlink, and we can recycle it. + * + * One complication here is that the delete XID is a "full" 64-bit + * transaction ID, but RecentGlobalXmin doesn't include the epoch. So + * we first have to form a full 64-bit version of RecentGlobalXmin to + * compare with. + */ + FullTransactionId deletexid_full = GistPageGetDeleteXid(page); + FullTransactionId nextxid_full; + uint32 nextxid_epoch; + TransactionId nextxid_xid; + FullTransactionId recentxmin_full; + uint32 recentxmin_epoch; + TransactionId recentxmin_xid; + + nextxid_full = ReadNextFullTransactionId(); + nextxid_epoch = EpochFromFullTransactionId(nextxid_full); + nextxid_xid = XidFromFullTransactionId(nextxid_full); + + recentxmin_xid = RecentGlobalXmin; + if (recentxmin_xid > nextxid_xid) + recentxmin_epoch = nextxid_epoch - 1; + else + recentxmin_epoch = nextxid_epoch; + recentxmin_full = + FullTransactionIdFromEpochAndXid(recentxmin_epoch, + recentxmin_xid); + + /* + * Now we have a 64-bit version of RecentGlobalXmin. Compare deletion + * XID against it. + */ + if (FullTransactionIdPrecedes(deletexid_full, recentxmin_full)) + return true; + } + return false; } bytea * diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index e2029d842c..07096194b2 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -595,7 +595,7 @@ gistdeletepage(IndexVacuumInfo *info, GistBulkDeleteResult *stats, ItemId iid; IndexTuple idxtuple; XLogRecPtr recptr; - TransactionId txid; + FullTransactionId txid; /* * Check that the leaf is still empty and deletable. @@ -648,7 +648,7 @@ gistdeletepage(IndexVacuumInfo *info, GistBulkDeleteResult *stats, * currently in progress must have ended. (That's much more conservative * than needed, but let's keep it safe and simple.) */ - txid = ReadNewTransactionId(); + txid = ReadNextFullTransactionId(); START_CRIT_SECTION(); diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c index 4fb1855e89..37a190219e 100644 --- a/src/backend/access/gist/gistxlog.c +++ b/src/backend/access/gist/gistxlog.c @@ -701,7 +701,7 @@ gistXLogSplit(bool page_is_leaf, * downlink from the parent page. */ XLogRecPtr -gistXLogPageDelete(Buffer buffer, TransactionId xid, +gistXLogPageDelete(Buffer buffer, FullTransactionId xid, Buffer parentBuffer, OffsetNumber downlinkOffset) { gistxlogPageDelete xlrec; @@ -725,9 +725,23 @@ gistXLogPageDelete(Buffer buffer, TransactionId xid, * Write XLOG record about reuse of a deleted page. */ void -gistXLogPageReuse(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid) +gistXLogPageReuse(Relation rel, BlockNumber blkno, FullTransactionId latestRemovedXid) { gistxlogPageReuse xlrec_reuse; + FullTransactionId nextxid; + uint64 diff; + + /* + * We can skip this if the page was deleted so long ago, that no scan can possibly + * still see it, even in a standby. One measure might be anything older than the + * table's frozen-xid, but we don't have that at hand here. But anything older than + * 2 billion, from the next XID, is surely old enough, because you would hit XID + * wraparound at that point. + */ + nextxid = ReadNextFullTransactionId(); + diff = U64FromFullTransactionId(nextxid) - U64FromFullTransactionId(latestRemovedXid); + if (diff < 0x7fffffff) + return; /* * Note that we don't register the buffer with the record, because this @@ -738,7 +752,7 @@ gistXLogPageReuse(Relation rel, BlockNumber blkno, TransactionId latestRemovedXi /* XLOG stuff */ xlrec_reuse.node = rel->rd_node; xlrec_reuse.block = blkno; - xlrec_reuse.latestRemovedXid = latestRemovedXid; + xlrec_reuse.latestRemovedXid = XidFromFullTransactionId(latestRemovedXid); XLogBeginInsert(); XLogRegisterData((char *) &xlrec_reuse, SizeOfGistxlogPageReuse); diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c index eb308c72d6..ba00315260 100644 --- a/src/backend/access/rmgrdesc/gistdesc.c +++ b/src/backend/access/rmgrdesc/gistdesc.c @@ -47,8 +47,10 @@ out_gistxlogPageSplit(StringInfo buf, gistxlogPageSplit *xlrec) static void out_gistxlogPageDelete(StringInfo buf, gistxlogPageDelete *xlrec) { - appendStringInfo(buf, "deleteXid %u; downlink %u", - xlrec->deleteXid, xlrec->downlinkOffset); + appendStringInfo(buf, "deleteXid %u:%u; downlink %u", + EpochFromFullTransactionId(xlrec->deleteXid), + XidFromFullTransactionId(xlrec->deleteXid), + xlrec->downlinkOffset); } void diff --git a/src/include/access/gist.h b/src/include/access/gist.h index 6902f4115b..8cdc1f0fa9 100644 --- a/src/include/access/gist.h +++ b/src/include/access/gist.h @@ -16,6 +16,7 @@ #ifndef GIST_H #define GIST_H +#include "access/transam.h" #include "access/xlog.h" #include "access/xlogdefs.h" #include "storage/block.h" @@ -159,8 +160,23 @@ typedef struct GISTENTRY #define GistPageSetNSN(page, val) ( PageXLogRecPtrSet(GistPageGetOpaque(page)->nsn, val)) /* For deleted pages we store last xid which could see the page in scan */ -#define GistPageGetDeleteXid(page) ( ((PageHeader) (page))->pd_prune_xid ) -#define GistPageSetDeleteXid(page, val) ( ((PageHeader) (page))->pd_prune_xid = val) +static inline FullTransactionId +GistPageGetDeleteXid(Page page) +{ + Assert(GistPageIsDeleted(page)); + Assert(((PageHeader) page)->pd_lower == MAXALIGN(SizeOfPageHeaderData) + sizeof(FullTransactionId)); + + return *(FullTransactionId *) PageGetContents(page); +} + +static inline void +GistPageSetDeleteXid(Page page, FullTransactionId deletexid) +{ + Assert(PageIsEmpty(page)); + ((PageHeader) page)->pd_lower = MAXALIGN(SizeOfPageHeaderData) + sizeof(FullTransactionId); + + *((FullTransactionId *) PageGetContents(page)) = deletexid; +} /* * Vector of GISTENTRY structs; user-defined methods union and picksplit diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 78e2e3fb31..2c54f208ca 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -419,11 +419,11 @@ extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup, /* gistxlog.c */ extern XLogRecPtr gistXLogPageDelete(Buffer buffer, - TransactionId xid, Buffer parentBuffer, + FullTransactionId xid, Buffer parentBuffer, OffsetNumber downlinkOffset); extern void gistXLogPageReuse(Relation rel, BlockNumber blkno, - TransactionId latestRemovedXid); + FullTransactionId latestRemovedXid); extern XLogRecPtr gistXLogUpdate(Buffer buffer, OffsetNumber *todelete, int ntodelete, diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h index 9990d97cbd..9967bdcf3f 100644 --- a/src/include/access/gistxlog.h +++ b/src/include/access/gistxlog.h @@ -83,7 +83,7 @@ typedef struct gistxlogPageSplit */ typedef struct gistxlogPageDelete { - TransactionId deleteXid; /* last Xid which could see page in scan */ + FullTransactionId deleteXid; /* last Xid which could see page in scan */ OffsetNumber downlinkOffset; /* Offset of downlink referencing this page */ } gistxlogPageDelete; -- 2.20.1