From 13d4289b171cd2dba479c4efc94e81a1f1f208f9 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Sun, 18 Jan 2026 11:18:11 -0500 Subject: [PATCH v10 01/11] Extract fake LSN infrastructure from GiST index AM. Extract utility functions used by GiST to generate fake LSNs so that other index AMs can reuse this infrastructure to generate fake LSNs. Preparation for an upcoming commit that will change the rules around holding on to buffer pins on leaf pages in unlogged nbtree indexes (actually, in all cases barring scans that use a non-MVCC snapshot). This is the patch that will add the new amgetbatch interface. Another preparatory commit will add fake LSN support to nbtree ahead of the amgetbatch commit. Bump XLOG_PAGE_MAGIC due to XLOG_GIST_ASSIGN_LSN becoming XLOG_ASSIGN_LSN. Author: Peter Geoghegan Discussion: https://postgr.es/m/CAH2-WzkehuhxyuA8quc7rRN3EtNXpiKsjPfO8mhb+0Dr2K0Dtg@mail.gmail.com --- src/include/access/gist_private.h | 4 -- src/include/access/gistxlog.h | 2 +- src/include/access/xlog.h | 1 + src/include/access/xloginsert.h | 2 + src/include/catalog/pg_control.h | 2 +- src/backend/access/gist/gist.c | 6 +-- src/backend/access/gist/gistutil.c | 50 ------------------- src/backend/access/gist/gistvacuum.c | 8 +-- src/backend/access/gist/gistxlog.c | 21 -------- src/backend/access/rmgrdesc/gistdesc.c | 6 --- src/backend/access/rmgrdesc/xlogdesc.c | 7 +++ src/backend/access/transam/xlog.c | 29 +++++++++++ src/backend/access/transam/xloginsert.c | 65 +++++++++++++++++++++++++ src/backend/storage/buffer/bufmgr.c | 14 +++--- 14 files changed, 120 insertions(+), 97 deletions(-) diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 552f605c0..44514f1cb 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -457,8 +457,6 @@ extern XLogRecPtr gistXLogSplit(bool page_is_leaf, BlockNumber origrlink, GistNSN orignsn, Buffer leftchildbuf, bool markfollowright); -extern XLogRecPtr gistXLogAssignLSN(void); - /* gistget.c */ extern bool gistgettuple(IndexScanDesc scan, ScanDirection dir); extern int64 gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm); @@ -531,8 +529,6 @@ extern void gistMakeUnionKey(GISTSTATE *giststate, int attno, GISTENTRY *entry2, bool isnull2, Datum *dst, bool *dstisnull); -extern XLogRecPtr gistGetFakeLSN(Relation rel); - /* gistvacuum.c */ extern IndexBulkDeleteResult *gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h index d3d1c6549..1c2cf6e81 100644 --- a/src/include/access/gistxlog.h +++ b/src/include/access/gistxlog.h @@ -26,7 +26,7 @@ /* #define XLOG_GIST_INSERT_COMPLETE 0x40 */ /* not used anymore */ /* #define XLOG_GIST_CREATE_INDEX 0x50 */ /* not used anymore */ #define XLOG_GIST_PAGE_DELETE 0x60 -#define XLOG_GIST_ASSIGN_LSN 0x70 /* nop, assign new LSN */ + /* #define XLOG_GIST_ASSIGN_LSN 0x70 */ /* not used anymore */ /* * Backup Blk 0: updated page. diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index fdfb57246..553d6fc9c 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -258,6 +258,7 @@ extern bool CreateRestartPoint(int flags); extern WALAvailability GetWALAvailability(XLogRecPtr targetLSN); extern void XLogPutNextOid(Oid nextOid); extern XLogRecPtr XLogRestorePoint(const char *rpName); +extern XLogRecPtr XLogAssignLSN(void); extern void UpdateFullPageWrites(void); extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p); extern XLogRecPtr GetRedoRecPtr(void); diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h index 16ebc76e7..91dfbd562 100644 --- a/src/include/access/xloginsert.h +++ b/src/include/access/xloginsert.h @@ -64,6 +64,8 @@ extern void log_newpage_range(Relation rel, ForkNumber forknum, BlockNumber startblk, BlockNumber endblk, bool page_std); extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std); +extern XLogRecPtr XLogGetFakeLSN(Relation rel); + extern void InitXLogInsert(void); #endif /* XLOGINSERT_H */ diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 7503db1af..77a661e81 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -78,7 +78,7 @@ typedef struct CheckPoint #define XLOG_END_OF_RECOVERY 0x90 #define XLOG_FPI_FOR_HINT 0xA0 #define XLOG_FPI 0xB0 -/* 0xC0 is used in Postgres 9.5-11 */ +#define XLOG_ASSIGN_LSN 0xC0 #define XLOG_OVERWRITE_CONTRECORD 0xD0 #define XLOG_CHECKPOINT_REDO 0xE0 #define XLOG_LOGICAL_DECODING_STATUS_CHANGE 0xF0 diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index dfffce3e3..8565e225b 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -517,7 +517,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, dist, oldrlink, oldnsn, leftchildbuf, markfollowright); else - recptr = gistGetFakeLSN(rel); + recptr = XLogGetFakeLSN(rel); } for (ptr = dist; ptr; ptr = ptr->next) @@ -594,7 +594,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, leftchildbuf); } else - recptr = gistGetFakeLSN(rel); + recptr = XLogGetFakeLSN(rel); } PageSetLSN(page, recptr); @@ -1733,7 +1733,7 @@ gistprunepage(Relation rel, Page page, Buffer buffer, Relation heapRel) PageSetLSN(page, recptr); } else - PageSetLSN(page, gistGetFakeLSN(rel)); + PageSetLSN(page, XLogGetFakeLSN(rel)); END_CRIT_SECTION(); } diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 27972fad2..0f58f6187 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -1007,56 +1007,6 @@ gistproperty(Oid index_oid, int attno, return true; } -/* - * Some indexes are not WAL-logged, but we need LSNs to detect concurrent page - * splits anyway. This function provides a fake sequence of LSNs for that - * purpose. - */ -XLogRecPtr -gistGetFakeLSN(Relation rel) -{ - if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP) - { - /* - * Temporary relations are only accessible in our session, so a simple - * backend-local counter will do. - */ - static XLogRecPtr counter = FirstNormalUnloggedLSN; - - return counter++; - } - else if (RelationIsPermanent(rel)) - { - /* - * WAL-logging on this relation will start after commit, so its LSNs - * must be distinct numbers smaller than the LSN at the next commit. - * Emit a dummy WAL record if insert-LSN hasn't advanced after the - * last call. - */ - static XLogRecPtr lastlsn = InvalidXLogRecPtr; - XLogRecPtr currlsn = GetXLogInsertRecPtr(); - - /* Shouldn't be called for WAL-logging relations */ - Assert(!RelationNeedsWAL(rel)); - - /* No need for an actual record if we already have a distinct LSN */ - if (XLogRecPtrIsValid(lastlsn) && lastlsn == currlsn) - currlsn = gistXLogAssignLSN(); - - lastlsn = currlsn; - return currlsn; - } - else - { - /* - * Unlogged relations are accessible from other backends, and survive - * (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us. - */ - Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED); - return GetFakeLSNForUnloggedRel(); - } -} - /* * This is a stratnum translation support function for GiST opclasses that use * the RT*StrategyNumber constants. diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index 9e714980d..686a04180 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -16,7 +16,7 @@ #include "access/genam.h" #include "access/gist_private.h" -#include "access/transam.h" +#include "access/xloginsert.h" #include "commands/vacuum.h" #include "lib/integerset.h" #include "miscadmin.h" @@ -182,7 +182,7 @@ gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, if (RelationNeedsWAL(rel)) vstate.startNSN = GetInsertRecPtr(); else - vstate.startNSN = gistGetFakeLSN(rel); + vstate.startNSN = XLogGetFakeLSN(rel); /* * The outer loop iterates over all index pages, in physical order (we @@ -413,7 +413,7 @@ restart: PageSetLSN(page, recptr); } else - PageSetLSN(page, gistGetFakeLSN(rel)); + PageSetLSN(page, XLogGetFakeLSN(rel)); END_CRIT_SECTION(); @@ -707,7 +707,7 @@ gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, if (RelationNeedsWAL(info->index)) recptr = gistXLogPageDelete(leafBuffer, txid, parentBuffer, downlink); else - recptr = gistGetFakeLSN(info->index); + recptr = XLogGetFakeLSN(info->index); PageSetLSN(parentPage, recptr); PageSetLSN(leafPage, recptr); diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c index c78383849..ae538dc81 100644 --- a/src/backend/access/gist/gistxlog.c +++ b/src/backend/access/gist/gistxlog.c @@ -421,9 +421,6 @@ gist_redo(XLogReaderState *record) case XLOG_GIST_PAGE_DELETE: gistRedoPageDelete(record); break; - case XLOG_GIST_ASSIGN_LSN: - /* nop. See gistGetFakeLSN(). */ - break; default: elog(PANIC, "gist_redo: unknown op code %u", info); } @@ -567,24 +564,6 @@ gistXLogPageDelete(Buffer buffer, FullTransactionId xid, return recptr; } -/* - * Write an empty XLOG record to assign a distinct LSN. - */ -XLogRecPtr -gistXLogAssignLSN(void) -{ - int dummy = 0; - - /* - * Records other than XLOG_SWITCH must have content. We use an integer 0 - * to follow the restriction. - */ - XLogBeginInsert(); - XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT); - XLogRegisterData(&dummy, sizeof(dummy)); - return XLogInsert(RM_GIST_ID, XLOG_GIST_ASSIGN_LSN); -} - /* * Write XLOG record about reuse of a deleted page. */ diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c index 79a839cc2..67789e025 100644 --- a/src/backend/access/rmgrdesc/gistdesc.c +++ b/src/backend/access/rmgrdesc/gistdesc.c @@ -80,9 +80,6 @@ gist_desc(StringInfo buf, XLogReaderState *record) case XLOG_GIST_PAGE_DELETE: out_gistxlogPageDelete(buf, (gistxlogPageDelete *) rec); break; - case XLOG_GIST_ASSIGN_LSN: - /* No details to write out */ - break; } } @@ -108,9 +105,6 @@ gist_identify(uint8 info) case XLOG_GIST_PAGE_DELETE: id = "PAGE_DELETE"; break; - case XLOG_GIST_ASSIGN_LSN: - id = "ASSIGN_LSN"; - break; } return id; diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index ff078f222..9044b9521 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -175,6 +175,10 @@ xlog_desc(StringInfo buf, XLogReaderState *record) memcpy(&enabled, rec, sizeof(bool)); appendStringInfoString(buf, enabled ? "true" : "false"); } + else if (info == XLOG_ASSIGN_LSN) + { + /* no further information to print */ + } } const char * @@ -229,6 +233,9 @@ xlog_identify(uint8 info) case XLOG_LOGICAL_DECODING_STATUS_CHANGE: id = "LOGICAL_DECODING_STATUS_CHANGE"; break; + case XLOG_ASSIGN_LSN: + id = "ASSIGN_LSN"; + break; } return id; diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 13ec6225b..aea12ae83 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -8230,6 +8230,31 @@ XLogRestorePoint(const char *rpName) return RecPtr; } +/* + * Write an empty XLOG record to assign a distinct LSN. + * + * This is used by index AMs (nbtree, hash, GiST) when building indexes on + * permanent relations with wal_level=minimal. In that scenario, WAL-logging + * will start after commit, but the index AM needs distinct LSNs to detect + * concurrent page modifications. When the current WAL insert position hasn't + * advanced since the last call, we emit this dummy record to ensure we get a + * new, distinct LSN. + */ +XLogRecPtr +XLogAssignLSN(void) +{ + int dummy = 0; + + /* + * Records other than XLOG_SWITCH must have content. We use an integer 0 + * to satisfy this restriction. + */ + XLogBeginInsert(); + XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT); + XLogRegisterData(&dummy, sizeof(dummy)); + return XLogInsert(RM_XLOG_ID, XLOG_ASSIGN_LSN); +} + /* * Check if any of the GUC parameters that are critical for hot standby * have changed, and update the value in pg_control file if necessary. @@ -8597,6 +8622,10 @@ xlog_redo(XLogReaderState *record) { /* nothing to do here, handled in xlogrecovery.c */ } + else if (info == XLOG_ASSIGN_LSN) + { + /* nothing to do here, see XLogGetFakeLSN() */ + } else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT) { /* diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index d3acaa636..b107e44ac 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -547,6 +547,71 @@ XLogSimpleInsertInt64(RmgrId rmid, uint8 info, int64 value) return XLogInsert(rmid, info); } +/* + * XLogGetFakeLSN - get a fake LSN for an index page that isn't WAL-logged. + * + * Some index AMs (nbtree, hash, GiST) use LSNs to detect concurrent page + * modifications, but not all index pages are WAL-logged. This function + * provides a sequence of fake LSNs for that purpose. + * + * The behavior depends on the relation's persistence: + * + * - For temporary relations, we use a simple backend-local counter since + * temporary relations are only accessible within our session. + * + * - For permanent relations when WAL-logging is disabled (e.g., during index + * creation with wal_level=minimal), we use the current WAL insert position. + * If the insert position hasn't advanced since the last call, we emit a + * dummy WAL record via XLogAssignLSN() to ensure we get a distinct LSN. + * + * - For unlogged relations, we use the global fake LSN counter maintained + * by GetFakeLSNForUnloggedRel(). + */ +XLogRecPtr +XLogGetFakeLSN(Relation rel) +{ + if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP) + { + /* + * Temporary relations are only accessible in our session, so a simple + * backend-local counter will do. + */ + static XLogRecPtr counter = FirstNormalUnloggedLSN; + + return counter++; + } + else if (RelationIsPermanent(rel)) + { + /* + * WAL-logging on this relation will start after commit, so its LSNs + * must be distinct numbers smaller than the LSN at the next commit. + * Emit a dummy WAL record if insert-LSN hasn't advanced after the + * last call. + */ + static XLogRecPtr lastlsn = InvalidXLogRecPtr; + XLogRecPtr currlsn = GetXLogInsertRecPtr(); + + /* Shouldn't be called for WAL-logging relations */ + Assert(!RelationNeedsWAL(rel)); + + /* No need for an actual record if we already have a distinct LSN */ + if (XLogRecPtrIsValid(lastlsn) && lastlsn == currlsn) + currlsn = XLogAssignLSN(); + + lastlsn = currlsn; + return currlsn; + } + else + { + /* + * Unlogged relations are accessible from other backends, and survive + * (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us. + */ + Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED); + return GetFakeLSNForUnloggedRel(); + } +} + /* * Assemble a WAL record from the registered data and buffers into an * XLogRecData chain, ready for insertion with XLogInsertRecord(). diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 7241477ca..551a0eeb3 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -4469,13 +4469,13 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, * lost after a crash anyway. Most unlogged relation pages do not bear * LSNs since we never emit WAL records for them, and therefore flushing * up through the buffer LSN would be useless, but harmless. However, - * GiST indexes use LSNs internally to track page-splits, and therefore - * unlogged GiST pages bear "fake" LSNs generated by - * GetFakeLSNForUnloggedRel. It is unlikely but possible that the fake - * LSN counter could advance past the WAL insertion point; and if it did - * happen, attempting to flush WAL through that location would fail, with - * disastrous system-wide consequences. To make sure that can't happen, - * skip the flush if the buffer isn't permanent. + * some index AMs (nbtree, hash, GiST) use LSNs internally to detect + * concurrent page modifications, and therefore unlogged index pages bear + * "fake" LSNs generated by XLogGetFakeLSN. It is unlikely but possible + * that the fake LSN counter could advance past the WAL insertion point; + * and if it did happen, attempting to flush WAL through that location + * would fail, with disastrous system-wide consequences. To make sure + * that can't happen, skip the flush if the buffer isn't permanent. */ if (buf_state & BM_PERMANENT) XLogFlush(recptr); -- 2.51.0