From 03a50acf74d9f9e7876b44b75ce94b106f6e4147 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Sun, 7 Apr 2019 18:00:10 -0700 Subject: [PATCH 2/2] WIP-copy-freeze-should-actually-freeze-right --- src/backend/access/heap/heapam.c | 124 +++++++++++++++++++++--- src/backend/access/heap/visibilitymap.c | 2 +- src/include/access/heapam_xlog.h | 2 + 3 files changed, 116 insertions(+), 12 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index a05b6a07ad0..d4040911956 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2115,6 +2115,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, int ndone; PGAlignedBlock scratch; Page page; + Buffer vmbuffer = InvalidBuffer; bool needwal; Size saveFreeSpace; bool need_tuple_data = RelationIsLogicallyLogged(relation); @@ -2169,9 +2170,11 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, while (ndone < ntuples) { Buffer buffer; - Buffer vmbuffer = InvalidBuffer; + bool starting_with_empty_page; bool all_visible_cleared = false; + bool all_frozen_set = false; int nthispage; + XLogRecPtr recptr; CHECK_FOR_INTERRUPTS(); @@ -2184,6 +2187,11 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, &vmbuffer, NULL); page = BufferGetPage(buffer); + starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0; + + if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN)) + all_frozen_set = true; + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -2209,7 +2217,14 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, log_heap_new_cid(relation, heaptup); } - if (PageIsAllVisible(page)) + /* + * If the page is all visible, need to clear that, unless we're only + * going to add further frozen rows to it. + * + * If we're only adding already frozen rows, and the page was + * previously empty, mark it as all-visible. + */ + if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN)) { all_visible_cleared = true; PageClearAllVisible(page); @@ -2217,6 +2232,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, BufferGetBlockNumber(buffer), vmbuffer, VISIBILITYMAP_VALID_BITS); } + else if (all_frozen_set) + PageSetAllVisible(page); /* * XXX Should we set PageSetPrunable on this page ? See heap_insert() @@ -2227,7 +2244,6 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, /* XLOG stuff */ if (needwal) { - XLogRecPtr recptr; xl_heap_multi_insert *xlrec; uint8 info = XLOG_HEAP2_MULTI_INSERT; char *tupledata; @@ -2240,8 +2256,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, * If the page was previously empty, we can reinit the page * instead of restoring the whole thing. */ - init = (ItemPointerGetOffsetNumber(&(heaptuples[ndone]->t_self)) == FirstOffsetNumber && - PageGetMaxOffsetNumber(page) == FirstOffsetNumber + nthispage - 1); + init = starting_with_empty_page; /* allocate xl_heap_multi_insert struct from the scratch area */ xlrec = (xl_heap_multi_insert *) scratchptr; @@ -2259,7 +2274,17 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, /* the rest of the scratch space is used for tuple data */ tupledata = scratchptr; - xlrec->flags = all_visible_cleared ? XLH_INSERT_ALL_VISIBLE_CLEARED : 0; + xlrec->flags = 0; + Assert((all_visible_cleared == 0 && all_frozen_set == 0) || + all_visible_cleared != all_frozen_set); + if (all_visible_cleared) + xlrec->flags |= XLH_INSERT_ALL_VISIBLE_CLEARED; + if (all_frozen_set) + { + xlrec->flags |= XLH_INSERT_ALL_VISIBLE_SET; + xlrec->flags |= XLH_INSERT_ALL_FROZEN_SET; + } + xlrec->ntuples = nthispage; /* @@ -2333,13 +2358,46 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, END_CRIT_SECTION(); - UnlockReleaseBuffer(buffer); - if (vmbuffer != InvalidBuffer) - ReleaseBuffer(vmbuffer); + if (all_frozen_set) + { + Assert(PageIsAllVisible(page)); + /* + * Having to potentially read the page while holding an exclusive + * lock on the page isn't great. But we only get here if + * HEAP_INSERT_FROZEN is set, and we only do so if the table isn't + * readable outside of this sessoin. Therefore doing IO here isn't + * that bad. + */ + visibilitymap_pin(relation, BufferGetBlockNumber(buffer), &vmbuffer); + + /* + * FIXME: setting recptr here is a dirty dirty hack, to prevent + * visibilitymap_set() from WAL logging. + * + * It's fine to use InvalidTransactionId here - this is only used + * when HEAP_INSERT_FROZEN is specified, which intentionally + * violates visibility rules. + */ + visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer, + recptr, vmbuffer, + InvalidTransactionId, + VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN); + } + + UnlockReleaseBuffer(buffer); ndone += nthispage; + + /* + * NB: Only release vmbuffer after inserting all tuples - it's fairly + * likely that we'll insert into subsequent heap pages that are likely + * to use the same vm page. + */ } + if (vmbuffer != InvalidBuffer) + ReleaseBuffer(vmbuffer); + /* * We're done with the actual inserts. Check for conflicts again, to * ensure that all rw-conflicts in to these inserts are detected. Without @@ -8176,6 +8234,7 @@ heap_xlog_multi_insert(XLogReaderState *record) BlockNumber blkno; Buffer buffer; Page page; + Page vmpage; union { HeapTupleHeaderData hdr; @@ -8200,13 +8259,54 @@ heap_xlog_multi_insert(XLogReaderState *record) * The visibility map may need to be fixed even if the heap page is * already up-to-date. */ - if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) + if (xlrec->flags & (XLH_INSERT_ALL_VISIBLE_CLEARED | + XLH_INSERT_ALL_VISIBLE_SET | + XLH_INSERT_ALL_FROZEN_SET)) { Relation reln = CreateFakeRelcacheEntry(rnode); Buffer vmbuffer = InvalidBuffer; visibilitymap_pin(reln, blkno, &vmbuffer); - visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS); + + if (xlrec->flags & (XLH_INSERT_ALL_VISIBLE_CLEARED)) + { + Assert(!(xlrec->flags & (XLH_INSERT_ALL_FROZEN_SET | + XLH_INSERT_ALL_VISIBLE_SET))); + visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS); + } + else + { + int vmbits = 0; + + if (xlrec->flags & (XLH_INSERT_ALL_VISIBLE_SET)) + vmbits |= VISIBILITYMAP_ALL_VISIBLE; + if (xlrec->flags & (XLH_INSERT_ALL_FROZEN_SET)) + vmbits |= VISIBILITYMAP_ALL_FROZEN; + + vmpage = BufferGetPage(vmbuffer); + + /* + * Don't set the bit if replay has already passed this point. + * + * It might be safe to do this unconditionally; if replay has passed + * this point, we'll replay at least as far this time as we did + * before, and if this bit needs to be cleared, the record responsible + * for doing so should be again replayed, and clear it. For right + * now, out of an abundance of conservatism, we use the same test here + * we did for the heap page. If this results in a dropped bit, no + * real harm is done; and the next VACUUM will fix it. + * + * XXX: This seems entirely unnecessary? + * + * FIXME: Theoretically we should only do this after we've + * modified the heap - but it's safe to do it here I think, + * because this means that the page previously was empty. + */ + if (lsn > PageGetLSN(vmpage)) + visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer, + InvalidTransactionId, vmbits); + } + ReleaseBuffer(vmbuffer); FreeFakeRelcacheEntry(reln); } @@ -8284,6 +8384,8 @@ heap_xlog_multi_insert(XLogReaderState *record) if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) PageClearAllVisible(page); + if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_SET) + PageSetAllVisible(page); MarkBufferDirty(buffer); } diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 64dfe06b261..8a9f4f4c42e 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -253,7 +253,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk); #endif - Assert(InRecovery || XLogRecPtrIsInvalid(recptr)); + //Assert(InRecovery || XLogRecPtrIsInvalid(recptr)); Assert(InRecovery || BufferIsValid(heapBuf)); Assert(flags & VISIBILITYMAP_VALID_BITS); diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h index 22cd13c47fc..557cc7dadcc 100644 --- a/src/include/access/heapam_xlog.h +++ b/src/include/access/heapam_xlog.h @@ -67,6 +67,8 @@ #define XLH_INSERT_LAST_IN_MULTI (1<<1) #define XLH_INSERT_IS_SPECULATIVE (1<<2) #define XLH_INSERT_CONTAINS_NEW_TUPLE (1<<3) +#define XLH_INSERT_ALL_VISIBLE_SET (1<<4) +#define XLH_INSERT_ALL_FROZEN_SET (1<<5) /* * xl_heap_update flag values, 8 bits are available. -- 2.21.0.dirty