From 0f3443d6544b2b871486429f961e9e6e740a8f20 Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Fri, 11 Mar 2022 11:36:34 +0300 Subject: [PATCH v54] Index SLRUs by 64-bit integers rather than by 32-bit integers We've had repeated bugs in the area of handling SLRU wraparound in the past, some of which have caused data loss. Switching to an indexing system for SLRUs that does not wrap around should allow us to get rid of a whole bunch of problems and improve the overall reliability of the system. This being said this particular patch only changes the indexing and doesn't address the wraparound per se. The patch also changes the SLRU segment naming. E.g instead of pg_xact/1234 now we have pg_xact/000000001234. The upgrade procedure is straightforward. Maxim Orlov, Aleksander Alekseev. With privious input from Alexander Korotkov, Teodor Sigaev, Nikita Glukhov, Pavel Borisov, Yura Sokolov. Reviewed-by: Aleksander Alekseev, Jacob Champion Discussion: https://postgr.es/m/CACG%3DezZe1NQSCnfHOr78AtAZxJZeCvxrts0ygrxYwe%3DpyyjVWA%40mail.gmail.com Discussion: https://postgr.es/m/CAJ7c6TPDOYBYrnCAeyndkBktO0WG2xSdYduTF0nxq%2BvfkmTF5Q%40mail.gmail.com --- src/backend/access/rmgrdesc/clogdesc.c | 10 +- src/backend/access/rmgrdesc/committsdesc.c | 10 +- src/backend/access/rmgrdesc/mxactdesc.c | 6 +- src/backend/access/transam/clog.c | 61 ++-- src/backend/access/transam/commit_ts.c | 48 +-- src/backend/access/transam/multixact.c | 56 +-- src/backend/access/transam/slru.c | 94 ++--- src/backend/access/transam/subtrans.c | 30 +- src/backend/commands/async.c | 18 +- src/backend/storage/lmgr/predicate.c | 10 +- src/bin/pg_upgrade/pg_upgrade.c | 320 +++++++++++++++++- src/bin/pg_upgrade/pg_upgrade.h | 5 + src/bin/pg_verifybackup/t/003_corruption.pl | 2 +- src/include/access/clog.h | 2 +- src/include/access/commit_ts.h | 2 +- src/include/access/slru.h | 24 +- src/include/storage/proc.h | 2 +- src/include/storage/sync.h | 2 +- .../modules/test_slru/expected/test_slru.out | 2 +- src/test/modules/test_slru/test_slru--1.0.sql | 14 +- src/test/modules/test_slru/test_slru.c | 25 +- 21 files changed, 546 insertions(+), 197 deletions(-) diff --git a/src/backend/access/rmgrdesc/clogdesc.c b/src/backend/access/rmgrdesc/clogdesc.c index e60b76f9da..6b367622ca 100644 --- a/src/backend/access/rmgrdesc/clogdesc.c +++ b/src/backend/access/rmgrdesc/clogdesc.c @@ -25,18 +25,18 @@ clog_desc(StringInfo buf, XLogReaderState *record) if (info == CLOG_ZEROPAGE) { - int pageno; + int64 pageno; - memcpy(&pageno, rec, sizeof(int)); - appendStringInfo(buf, "page %d", pageno); + memcpy(&pageno, rec, sizeof(pageno)); + appendStringInfo(buf, "page %lld", (long long) pageno); } else if (info == CLOG_TRUNCATE) { xl_clog_truncate xlrec; memcpy(&xlrec, rec, sizeof(xl_clog_truncate)); - appendStringInfo(buf, "page %d; oldestXact %u", - xlrec.pageno, xlrec.oldestXact); + appendStringInfo(buf, "page %lld; oldestXact %u", + (long long) xlrec.pageno, xlrec.oldestXact); } } diff --git a/src/backend/access/rmgrdesc/committsdesc.c b/src/backend/access/rmgrdesc/committsdesc.c index e7155cd507..6a1a6413f1 100644 --- a/src/backend/access/rmgrdesc/committsdesc.c +++ b/src/backend/access/rmgrdesc/committsdesc.c @@ -26,17 +26,17 @@ commit_ts_desc(StringInfo buf, XLogReaderState *record) if (info == COMMIT_TS_ZEROPAGE) { - int pageno; + int64 pageno; - memcpy(&pageno, rec, sizeof(int)); - appendStringInfo(buf, "%d", pageno); + memcpy(&pageno, rec, sizeof(pageno)); + appendStringInfo(buf, "%lld", (long long) pageno); } else if (info == COMMIT_TS_TRUNCATE) { xl_commit_ts_truncate *trunc = (xl_commit_ts_truncate *) rec; - appendStringInfo(buf, "pageno %d, oldestXid %u", - trunc->pageno, trunc->oldestXid); + appendStringInfo(buf, "pageno %lld, oldestXid %u", + (long long) trunc->pageno, trunc->oldestXid); } } diff --git a/src/backend/access/rmgrdesc/mxactdesc.c b/src/backend/access/rmgrdesc/mxactdesc.c index a2fa1eca18..e423a3da5e 100644 --- a/src/backend/access/rmgrdesc/mxactdesc.c +++ b/src/backend/access/rmgrdesc/mxactdesc.c @@ -55,10 +55,10 @@ multixact_desc(StringInfo buf, XLogReaderState *record) if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE || info == XLOG_MULTIXACT_ZERO_MEM_PAGE) { - int pageno; + int64 pageno; - memcpy(&pageno, rec, sizeof(int)); - appendStringInfo(buf, "%d", pageno); + memcpy(&pageno, rec, sizeof(pageno)); + appendStringInfo(buf, "%lld", (long long) pageno); } else if (info == XLOG_MULTIXACT_CREATE_ID) { diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 4a431d5876..4a4f71453e 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -62,7 +62,16 @@ #define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE) #define CLOG_XACT_BITMASK ((1 << CLOG_BITS_PER_XACT) - 1) -#define TransactionIdToPage(xid) ((xid) / (TransactionId) CLOG_XACTS_PER_PAGE) + +/* + * Although we return an int64 the actual value can't currently exceeed 2**32. + */ +static inline int64 +TransactionIdToPage(TransactionId xid) +{ + return xid / (int64) CLOG_XACTS_PER_PAGE; +} + #define TransactionIdToPgIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_PAGE) #define TransactionIdToByte(xid) (TransactionIdToPgIndex(xid) / CLOG_XACTS_PER_BYTE) #define TransactionIdToBIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_BYTE) @@ -89,24 +98,24 @@ static SlruCtlData XactCtlData; #define XactCtl (&XactCtlData) -static int ZeroCLOGPage(int pageno, bool writeXlog); -static bool CLOGPagePrecedes(int page1, int page2); -static void WriteZeroPageXlogRec(int pageno); -static void WriteTruncateXlogRec(int pageno, TransactionId oldestXact, +static int ZeroCLOGPage(int64 pageno, bool writeXlog); +static bool CLOGPagePrecedes(int64 page1, int64 page2); +static void WriteZeroPageXlogRec(int64 pageno); +static void WriteTruncateXlogRec(int64 pageno, TransactionId oldestXact, Oid oldestXactDb); static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, - XLogRecPtr lsn, int pageno, + XLogRecPtr lsn, int64 pageno, bool all_xact_same_page); static void TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, int slotno); static void set_status_by_pages(int nsubxids, TransactionId *subxids, XidStatus status, XLogRecPtr lsn); static bool TransactionGroupUpdateXidStatus(TransactionId xid, - XidStatus status, XLogRecPtr lsn, int pageno); + XidStatus status, XLogRecPtr lsn, int64 pageno); static void TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, - XLogRecPtr lsn, int pageno); + XLogRecPtr lsn, int64 pageno); /* @@ -162,7 +171,7 @@ void TransactionIdSetTreeStatus(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, XLogRecPtr lsn) { - int pageno = TransactionIdToPage(xid); /* get page of parent */ + int64 pageno = TransactionIdToPage(xid); /* get page of parent */ int i; Assert(status == TRANSACTION_STATUS_COMMITTED || @@ -236,7 +245,7 @@ static void set_status_by_pages(int nsubxids, TransactionId *subxids, XidStatus status, XLogRecPtr lsn) { - int pageno = TransactionIdToPage(subxids[0]); + int64 pageno = TransactionIdToPage(subxids[0]); int offset = 0; int i = 0; @@ -245,7 +254,7 @@ set_status_by_pages(int nsubxids, TransactionId *subxids, while (i < nsubxids) { int num_on_page = 0; - int nextpageno; + int64 nextpageno; do { @@ -271,7 +280,7 @@ set_status_by_pages(int nsubxids, TransactionId *subxids, static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, - XLogRecPtr lsn, int pageno, + XLogRecPtr lsn, int64 pageno, bool all_xact_same_page) { /* Can't use group update when PGPROC overflows. */ @@ -337,7 +346,7 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids, static void TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, - XLogRecPtr lsn, int pageno) + XLogRecPtr lsn, int64 pageno) { int slotno; int i; @@ -411,7 +420,7 @@ TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids, */ static bool TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status, - XLogRecPtr lsn, int pageno) + XLogRecPtr lsn, int64 pageno) { volatile PROC_HDR *procglobal = ProcGlobal; PGPROC *proc = MyProc; @@ -637,7 +646,7 @@ TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, i XidStatus TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn) { - int pageno = TransactionIdToPage(xid); + int64 pageno = TransactionIdToPage(xid); int byteno = TransactionIdToByte(xid); int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT; int slotno; @@ -734,7 +743,7 @@ BootStrapCLOG(void) * Control lock must be held at entry, and will be held at exit. */ static int -ZeroCLOGPage(int pageno, bool writeXlog) +ZeroCLOGPage(int64 pageno, bool writeXlog) { int slotno; @@ -754,7 +763,7 @@ void StartupCLOG(void) { TransactionId xid = XidFromFullTransactionId(ShmemVariableCache->nextXid); - int pageno = TransactionIdToPage(xid); + int64 pageno = TransactionIdToPage(xid); LWLockAcquire(XactSLRULock, LW_EXCLUSIVE); @@ -773,7 +782,7 @@ void TrimCLOG(void) { TransactionId xid = XidFromFullTransactionId(ShmemVariableCache->nextXid); - int pageno = TransactionIdToPage(xid); + int64 pageno = TransactionIdToPage(xid); LWLockAcquire(XactSLRULock, LW_EXCLUSIVE); @@ -838,7 +847,7 @@ CheckPointCLOG(void) void ExtendCLOG(TransactionId newestXact) { - int pageno; + int64 pageno; /* * No work except at first XID of a page. But beware: just after @@ -877,7 +886,7 @@ ExtendCLOG(TransactionId newestXact) void TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid) { - int cutoffPage; + int64 cutoffPage; /* * The cutoff point is the start of the segment containing oldestXact. We @@ -930,7 +939,7 @@ TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid) * don't optimize that edge case. */ static bool -CLOGPagePrecedes(int page1, int page2) +CLOGPagePrecedes(int64 page1, int64 page2) { TransactionId xid1; TransactionId xid2; @@ -949,10 +958,10 @@ CLOGPagePrecedes(int page1, int page2) * Write a ZEROPAGE xlog record */ static void -WriteZeroPageXlogRec(int pageno) +WriteZeroPageXlogRec(int64 pageno) { XLogBeginInsert(); - XLogRegisterData((char *) (&pageno), sizeof(int)); + XLogRegisterData((char *) (&pageno), sizeof(pageno)); (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE); } @@ -963,7 +972,7 @@ WriteZeroPageXlogRec(int pageno) * in TruncateCLOG(). */ static void -WriteTruncateXlogRec(int pageno, TransactionId oldestXact, Oid oldestXactDb) +WriteTruncateXlogRec(int64 pageno, TransactionId oldestXact, Oid oldestXactDb) { XLogRecPtr recptr; xl_clog_truncate xlrec; @@ -991,10 +1000,10 @@ clog_redo(XLogReaderState *record) if (info == CLOG_ZEROPAGE) { - int pageno; + int64 pageno; int slotno; - memcpy(&pageno, XLogRecGetData(record), sizeof(int)); + memcpy(&pageno, XLogRecGetData(record), sizeof(pageno)); LWLockAcquire(XactSLRULock, LW_EXCLUSIVE); diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index b897fabc70..d7a7e88348 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -65,8 +65,16 @@ typedef struct CommitTimestampEntry #define COMMIT_TS_XACTS_PER_PAGE \ (BLCKSZ / SizeOfCommitTimestampEntry) -#define TransactionIdToCTsPage(xid) \ - ((xid) / (TransactionId) COMMIT_TS_XACTS_PER_PAGE) + +/* + * Although we return an int64 the actual value can't currently exceeed 2**32. + */ +static inline int64 +TransactionIdToCTsPage(TransactionId xid) +{ + return xid / (int64) COMMIT_TS_XACTS_PER_PAGE; +} + #define TransactionIdToCTsEntry(xid) \ ((xid) % (TransactionId) COMMIT_TS_XACTS_PER_PAGE) @@ -103,16 +111,16 @@ bool track_commit_timestamp; static void SetXidCommitTsInPage(TransactionId xid, int nsubxids, TransactionId *subxids, TimestampTz ts, - RepOriginId nodeid, int pageno); + RepOriginId nodeid, int64 pageno); static void TransactionIdSetCommitTs(TransactionId xid, TimestampTz ts, RepOriginId nodeid, int slotno); static void error_commit_ts_disabled(void); -static int ZeroCommitTsPage(int pageno, bool writeXlog); -static bool CommitTsPagePrecedes(int page1, int page2); +static int ZeroCommitTsPage(int64 pageno, bool writeXlog); +static bool CommitTsPagePrecedes(int64 page1, int64 page2); static void ActivateCommitTs(void); static void DeactivateCommitTs(void); -static void WriteZeroPageXlogRec(int pageno); -static void WriteTruncateXlogRec(int pageno, TransactionId oldestXid); +static void WriteZeroPageXlogRec(int64 pageno); +static void WriteTruncateXlogRec(int64 pageno, TransactionId oldestXid); /* * TransactionTreeSetCommitTsData @@ -170,7 +178,7 @@ TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids, i = 0; for (;;) { - int pageno = TransactionIdToCTsPage(headxid); + int64 pageno = TransactionIdToCTsPage(headxid); int j; for (j = i; j < nsubxids; j++) @@ -214,7 +222,7 @@ TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids, static void SetXidCommitTsInPage(TransactionId xid, int nsubxids, TransactionId *subxids, TimestampTz ts, - RepOriginId nodeid, int pageno) + RepOriginId nodeid, int64 pageno) { int slotno; int i; @@ -266,7 +274,7 @@ bool TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts, RepOriginId *nodeid) { - int pageno = TransactionIdToCTsPage(xid); + int64 pageno = TransactionIdToCTsPage(xid); int entryno = TransactionIdToCTsEntry(xid); int slotno; CommitTimestampEntry entry; @@ -569,7 +577,7 @@ BootStrapCommitTs(void) * Control lock must be held at entry, and will be held at exit. */ static int -ZeroCommitTsPage(int pageno, bool writeXlog) +ZeroCommitTsPage(int64 pageno, bool writeXlog) { int slotno; @@ -662,7 +670,7 @@ static void ActivateCommitTs(void) { TransactionId xid; - int pageno; + int64 pageno; /* If we've done this already, there's nothing to do */ LWLockAcquire(CommitTsLock, LW_EXCLUSIVE); @@ -795,7 +803,7 @@ CheckPointCommitTs(void) void ExtendCommitTs(TransactionId newestXact) { - int pageno; + int64 pageno; /* * Nothing to do if module not enabled. Note we do an unlocked read of @@ -833,7 +841,7 @@ ExtendCommitTs(TransactionId newestXact) void TruncateCommitTs(TransactionId oldestXact) { - int cutoffPage; + int64 cutoffPage; /* * The cutoff point is the start of the segment containing oldestXact. We @@ -918,7 +926,7 @@ AdvanceOldestCommitTsXid(TransactionId oldestXact) * oldestXact=N+2.1, it would be precious at oldestXact=N+2.9. */ static bool -CommitTsPagePrecedes(int page1, int page2) +CommitTsPagePrecedes(int64 page1, int64 page2) { TransactionId xid1; TransactionId xid2; @@ -937,10 +945,10 @@ CommitTsPagePrecedes(int page1, int page2) * Write a ZEROPAGE xlog record */ static void -WriteZeroPageXlogRec(int pageno) +WriteZeroPageXlogRec(int64 pageno) { XLogBeginInsert(); - XLogRegisterData((char *) (&pageno), sizeof(int)); + XLogRegisterData((char *) (&pageno), sizeof(pageno)); (void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE); } @@ -948,7 +956,7 @@ WriteZeroPageXlogRec(int pageno) * Write a TRUNCATE xlog record */ static void -WriteTruncateXlogRec(int pageno, TransactionId oldestXid) +WriteTruncateXlogRec(int64 pageno, TransactionId oldestXid) { xl_commit_ts_truncate xlrec; @@ -973,10 +981,10 @@ commit_ts_redo(XLogReaderState *record) if (info == COMMIT_TS_ZEROPAGE) { - int pageno; + int64 pageno; int slotno; - memcpy(&pageno, XLogRecGetData(record), sizeof(int)); + memcpy(&pageno, XLogRecGetData(record), sizeof(pageno)); LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE); diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index fe6698d5ff..a0e28cf443 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -354,10 +354,10 @@ static void mXactCachePut(MultiXactId multi, int nmembers, static char *mxstatus_to_string(MultiXactStatus status); /* management of SLRU infrastructure */ -static int ZeroMultiXactOffsetPage(int pageno, bool writeXlog); -static int ZeroMultiXactMemberPage(int pageno, bool writeXlog); -static bool MultiXactOffsetPagePrecedes(int page1, int page2); -static bool MultiXactMemberPagePrecedes(int page1, int page2); +static int ZeroMultiXactOffsetPage(int64 pageno, bool writeXlog); +static int ZeroMultiXactMemberPage(int64 pageno, bool writeXlog); +static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2); +static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2); static bool MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2); static void ExtendMultiXactOffset(MultiXactId multi); @@ -366,7 +366,7 @@ static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start, uint32 distance); static bool SetOffsetVacuumLimit(bool is_startup); static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result); -static void WriteMZeroPageXlogRec(int pageno, uint8 info); +static void WriteMZeroPageXlogRec(int64 pageno, uint8 info); static void WriteMTruncateXlogRec(Oid oldestMultiDB, MultiXactId startTruncOff, MultiXactId endTruncOff, @@ -864,8 +864,8 @@ static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, int nmembers, MultiXactMember *members) { - int pageno; - int prev_pageno; + int64 pageno; + int64 prev_pageno; int entryno; int slotno; MultiXactOffset *offptr; @@ -1225,8 +1225,8 @@ int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly) { - int pageno; - int prev_pageno; + int64 pageno; + int64 prev_pageno; int entryno; int slotno; MultiXactOffset *offptr; @@ -1928,7 +1928,7 @@ BootStrapMultiXact(void) * Control lock must be held at entry, and will be held at exit. */ static int -ZeroMultiXactOffsetPage(int pageno, bool writeXlog) +ZeroMultiXactOffsetPage(int64 pageno, bool writeXlog) { int slotno; @@ -1944,7 +1944,7 @@ ZeroMultiXactOffsetPage(int pageno, bool writeXlog) * Ditto, for MultiXactMember */ static int -ZeroMultiXactMemberPage(int pageno, bool writeXlog) +ZeroMultiXactMemberPage(int64 pageno, bool writeXlog) { int slotno; @@ -1974,7 +1974,7 @@ ZeroMultiXactMemberPage(int pageno, bool writeXlog) static void MaybeExtendOffsetSlru(void) { - int pageno; + int64 pageno; pageno = MultiXactIdToOffsetPage(MultiXactState->nextMXact); @@ -2009,7 +2009,7 @@ StartupMultiXact(void) { MultiXactId multi = MultiXactState->nextMXact; MultiXactOffset offset = MultiXactState->nextOffset; - int pageno; + int64 pageno; /* * Initialize offset's idea of the latest page number. @@ -2034,7 +2034,7 @@ TrimMultiXact(void) MultiXactOffset offset; MultiXactId oldestMXact; Oid oldestMXactDB; - int pageno; + int64 pageno; int entryno; int flagsoff; @@ -2403,7 +2403,7 @@ MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB) static void ExtendMultiXactOffset(MultiXactId multi) { - int pageno; + int64 pageno; /* * No work except at first MultiXactId of a page. But beware: just after @@ -2452,7 +2452,7 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers) flagsbit = MXOffsetToFlagsBitShift(offset); if (flagsoff == 0 && flagsbit == 0) { - int pageno; + int64 pageno; pageno = MXOffsetToMemberPage(offset); @@ -2735,7 +2735,7 @@ static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result) { MultiXactOffset offset; - int pageno; + int64 pageno; int entryno; int slotno; MultiXactOffset *offptr; @@ -2854,7 +2854,7 @@ MultiXactMemberFreezeThreshold(void) typedef struct mxtruncinfo { - int earliestExistingPage; + int64 earliestExistingPage; } mxtruncinfo; /* @@ -2862,7 +2862,7 @@ typedef struct mxtruncinfo * This callback determines the earliest existing page number. */ static bool -SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int segpage, void *data) +SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int64 segpage, void *data) { mxtruncinfo *trunc = (mxtruncinfo *) data; @@ -3113,7 +3113,7 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB) * translational symmetry. */ static bool -MultiXactOffsetPagePrecedes(int page1, int page2) +MultiXactOffsetPagePrecedes(int64 page1, int64 page2) { MultiXactId multi1; MultiXactId multi2; @@ -3133,7 +3133,7 @@ MultiXactOffsetPagePrecedes(int page1, int page2) * purposes. There is no "invalid offset number" so use the numbers verbatim. */ static bool -MultiXactMemberPagePrecedes(int page1, int page2) +MultiXactMemberPagePrecedes(int64 page1, int64 page2) { MultiXactOffset offset1; MultiXactOffset offset2; @@ -3191,10 +3191,10 @@ MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2) * OFFSETs page (info shows which) */ static void -WriteMZeroPageXlogRec(int pageno, uint8 info) +WriteMZeroPageXlogRec(int64 pageno, uint8 info) { XLogBeginInsert(); - XLogRegisterData((char *) (&pageno), sizeof(int)); + XLogRegisterData((char *) (&pageno), sizeof(pageno)); (void) XLogInsert(RM_MULTIXACT_ID, info); } @@ -3239,10 +3239,10 @@ multixact_redo(XLogReaderState *record) if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE) { - int pageno; + int64 pageno; int slotno; - memcpy(&pageno, XLogRecGetData(record), sizeof(int)); + memcpy(&pageno, XLogRecGetData(record), sizeof(pageno)); LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE); @@ -3254,10 +3254,10 @@ multixact_redo(XLogReaderState *record) } else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE) { - int pageno; + int64 pageno; int slotno; - memcpy(&pageno, XLogRecGetData(record), sizeof(int)); + memcpy(&pageno, XLogRecGetData(record), sizeof(pageno)); LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE); @@ -3299,7 +3299,7 @@ multixact_redo(XLogReaderState *record) else if (info == XLOG_MULTIXACT_TRUNCATE_ID) { xl_multixact_truncate xlrec; - int pageno; + int64 pageno; memcpy(&xlrec, XLogRecGetData(record), SizeOfMultiXactTruncate); diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 5ab86238a9..ba63602cdc 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -60,8 +60,11 @@ #include "storage/fd.h" #include "storage/shmem.h" -#define SlruFileName(ctl, path, seg) \ - snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg) +static int inline +SlruFileName(SlruCtl ctl, char *path, int64 segno) +{ + return snprintf(path, MAXPGPATH, "%s/%012llX", ctl->Dir, (long long) segno); +} /* * During SimpleLruWriteAll(), we will usually not need to write more than one @@ -75,7 +78,7 @@ typedef struct SlruWriteAllData { int num_files; /* # files actually open */ int fd[MAX_WRITEALL_BUFFERS]; /* their FD's */ - int segno[MAX_WRITEALL_BUFFERS]; /* their log seg#s */ + int64 segno[MAX_WRITEALL_BUFFERS]; /* their log seg#s */ } SlruWriteAllData; typedef struct SlruWriteAllData *SlruWriteAll; @@ -138,15 +141,16 @@ static int slru_errno; static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno); static void SimpleLruWaitIO(SlruCtl ctl, int slotno); static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata); -static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno); -static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, +static bool SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno); +static bool SlruPhysicalWritePage(SlruCtl ctl, int64 pageno, int slotno, SlruWriteAll fdata); -static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid); -static int SlruSelectLRUPage(SlruCtl ctl, int pageno); +static void SlruReportIOError(SlruCtl ctl, int64 pageno, TransactionId xid); +static int SlruSelectLRUPage(SlruCtl ctl, int64 pageno); static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, - int segpage, void *data); -static void SlruInternalDeleteSegment(SlruCtl ctl, int segno); + int64 segpage, void *data); +static void SlruInternalDeleteSegment(SlruCtl ctl, int64 segno); + /* * Initialization of shared memory @@ -162,7 +166,7 @@ SimpleLruShmemSize(int nslots, int nlsns) sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */ sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */ sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */ - sz += MAXALIGN(nslots * sizeof(int)); /* page_number[] */ + sz += MAXALIGN(nslots * sizeof(int64)); /* page_number[] */ sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */ sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */ @@ -226,8 +230,8 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, offset += MAXALIGN(nslots * sizeof(SlruPageStatus)); shared->page_dirty = (bool *) (ptr + offset); offset += MAXALIGN(nslots * sizeof(bool)); - shared->page_number = (int *) (ptr + offset); - offset += MAXALIGN(nslots * sizeof(int)); + shared->page_number = (int64 *) (ptr + offset); + offset += MAXALIGN(nslots * sizeof(int64)); shared->page_lru_count = (int *) (ptr + offset); offset += MAXALIGN(nslots * sizeof(int)); @@ -278,7 +282,7 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, * Control lock must be held at entry, and will be held at exit. */ int -SimpleLruZeroPage(SlruCtl ctl, int pageno) +SimpleLruZeroPage(SlruCtl ctl, int64 pageno) { SlruShared shared = ctl->shared; int slotno; @@ -393,7 +397,7 @@ SimpleLruWaitIO(SlruCtl ctl, int slotno) * Control lock must be held at entry, and will be held at exit. */ int -SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, +SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid) { SlruShared shared = ctl->shared; @@ -493,7 +497,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, * It is unspecified whether the lock will be shared or exclusive. */ int -SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid) +SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno, TransactionId xid) { SlruShared shared = ctl->shared; int slotno; @@ -540,7 +544,7 @@ static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata) { SlruShared shared = ctl->shared; - int pageno = shared->page_number[slotno]; + int64 pageno = shared->page_number[slotno]; bool ok; /* If a write is in progress, wait for it to finish */ @@ -624,9 +628,9 @@ SimpleLruWritePage(SlruCtl ctl, int slotno) * large enough to contain the given page. */ bool -SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno) +SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int64 pageno) { - int segno = pageno / SLRU_PAGES_PER_SEGMENT; + int64 segno = pageno / SLRU_PAGES_PER_SEGMENT; int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; int offset = rpageno * BLCKSZ; char path[MAXPGPATH]; @@ -682,10 +686,10 @@ SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno) * read/write operations. We could cache one virtual file pointer ... */ static bool -SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno) +SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno) { SlruShared shared = ctl->shared; - int segno = pageno / SLRU_PAGES_PER_SEGMENT; + int64 segno = pageno / SLRU_PAGES_PER_SEGMENT; int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; off_t offset = rpageno * BLCKSZ; char path[MAXPGPATH]; @@ -754,10 +758,10 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno) * SimpleLruWriteAll. */ static bool -SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata) +SlruPhysicalWritePage(SlruCtl ctl, int64 pageno, int slotno, SlruWriteAll fdata) { SlruShared shared = ctl->shared; - int segno = pageno / SLRU_PAGES_PER_SEGMENT; + int64 segno = pageno / SLRU_PAGES_PER_SEGMENT; int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; off_t offset = rpageno * BLCKSZ; char path[MAXPGPATH]; @@ -929,9 +933,9 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata) * SlruPhysicalWritePage. Call this after cleaning up shared-memory state. */ static void -SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid) +SlruReportIOError(SlruCtl ctl, int64 pageno, TransactionId xid) { - int segno = pageno / SLRU_PAGES_PER_SEGMENT; + int64 segno = pageno / SLRU_PAGES_PER_SEGMENT; int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; int offset = rpageno * BLCKSZ; char path[MAXPGPATH]; @@ -1014,7 +1018,7 @@ SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid) * Control lock must be held at entry, and will be held at exit. */ static int -SlruSelectLRUPage(SlruCtl ctl, int pageno) +SlruSelectLRUPage(SlruCtl ctl, int64 pageno) { SlruShared shared = ctl->shared; @@ -1025,10 +1029,10 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) int cur_count; int bestvalidslot = 0; /* keep compiler quiet */ int best_valid_delta = -1; - int best_valid_page_number = 0; /* keep compiler quiet */ + int64 best_valid_page_number = 0; /* keep compiler quiet */ int bestinvalidslot = 0; /* keep compiler quiet */ int best_invalid_delta = -1; - int best_invalid_page_number = 0; /* keep compiler quiet */ + int64 best_invalid_page_number = 0; /* keep compiler quiet */ /* See if page already has a buffer assigned */ for (slotno = 0; slotno < shared->num_slots; slotno++) @@ -1069,7 +1073,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) for (slotno = 0; slotno < shared->num_slots; slotno++) { int this_delta; - int this_page_number; + int64 this_page_number; if (shared->page_status[slotno] == SLRU_PAGE_EMPTY) return slotno; @@ -1159,7 +1163,7 @@ SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied) SlruShared shared = ctl->shared; SlruWriteAllData fdata; int slotno; - int pageno = 0; + int64 pageno = 0; int i; bool ok; @@ -1224,7 +1228,7 @@ SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied) * after it has accrued freshly-written data. */ void -SimpleLruTruncate(SlruCtl ctl, int cutoffPage) +SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage) { SlruShared shared = ctl->shared; int slotno; @@ -1302,7 +1306,7 @@ restart: * they either can't yet contain anything, or have already been cleaned out. */ static void -SlruInternalDeleteSegment(SlruCtl ctl, int segno) +SlruInternalDeleteSegment(SlruCtl ctl, int64 segno) { char path[MAXPGPATH]; @@ -1325,7 +1329,7 @@ SlruInternalDeleteSegment(SlruCtl ctl, int segno) * Delete an individual SLRU segment, identified by the segment number. */ void -SlruDeleteSegment(SlruCtl ctl, int segno) +SlruDeleteSegment(SlruCtl ctl, int64 segno) { SlruShared shared = ctl->shared; int slotno; @@ -1389,9 +1393,9 @@ restart: * first>=cutoff && last>=cutoff: no; every page of this segment is too young */ static bool -SlruMayDeleteSegment(SlruCtl ctl, int segpage, int cutoffPage) +SlruMayDeleteSegment(SlruCtl ctl, int64 segpage, int64 cutoffPage) { - int seg_last_page = segpage + SLRU_PAGES_PER_SEGMENT - 1; + int64 seg_last_page = segpage + SLRU_PAGES_PER_SEGMENT - 1; Assert(segpage % SLRU_PAGES_PER_SEGMENT == 0); @@ -1405,7 +1409,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) { TransactionId lhs, rhs; - int newestPage, + int64 newestPage, oldestPage; TransactionId newestXact, oldestXact; @@ -1498,9 +1502,10 @@ SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page) * one containing the page passed as "data". */ bool -SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data) +SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int64 segpage, + void *data) { - int cutoffPage = *(int *) data; + int64 cutoffPage = *(int64 *) data; if (SlruMayDeleteSegment(ctl, segpage, cutoffPage)) return true; /* found one; don't iterate any more */ @@ -1513,9 +1518,10 @@ SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data * This callback deletes segments prior to the one passed in as "data". */ static bool -SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data) +SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int64 segpage, + void *data) { - int cutoffPage = *(int *) data; + int64 cutoffPage = *(int64 *) data; if (SlruMayDeleteSegment(ctl, segpage, cutoffPage)) SlruInternalDeleteSegment(ctl, segpage / SLRU_PAGES_PER_SEGMENT); @@ -1528,7 +1534,7 @@ SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data) * This callback deletes all segments. */ bool -SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data) +SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int64 segpage, void *data) { SlruInternalDeleteSegment(ctl, segpage / SLRU_PAGES_PER_SEGMENT); @@ -1556,8 +1562,8 @@ SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data) bool retval = false; DIR *cldir; struct dirent *clde; - int segno; - int segpage; + int64 segno; + int64 segpage; cldir = AllocateDir(ctl->Dir); while ((clde = ReadDir(cldir, ctl->Dir)) != NULL) @@ -1566,10 +1572,10 @@ SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data) len = strlen(clde->d_name); - if ((len == 4 || len == 5 || len == 6) && + if ((len == 12 || len == 13 || len == 14) && strspn(clde->d_name, "0123456789ABCDEF") == len) { - segno = (int) strtol(clde->d_name, NULL, 16); + segno = (int64) strtol(clde->d_name, NULL, 16); segpage = segno * SLRU_PAGES_PER_SEGMENT; elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s", diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index 62bb610167..8937a31d46 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -51,7 +51,15 @@ /* We need four bytes per xact */ #define SUBTRANS_XACTS_PER_PAGE (BLCKSZ / sizeof(TransactionId)) -#define TransactionIdToPage(xid) ((xid) / (TransactionId) SUBTRANS_XACTS_PER_PAGE) +/* + * Although we return an int64 the actual value can't currently exceeed 2**32. + */ +static inline int64 +TransactionIdToPage(TransactionId xid) +{ + return xid / (int64) SUBTRANS_XACTS_PER_PAGE; +} + #define TransactionIdToEntry(xid) ((xid) % (TransactionId) SUBTRANS_XACTS_PER_PAGE) @@ -63,8 +71,8 @@ static SlruCtlData SubTransCtlData; #define SubTransCtl (&SubTransCtlData) -static int ZeroSUBTRANSPage(int pageno); -static bool SubTransPagePrecedes(int page1, int page2); +static int ZeroSUBTRANSPage(int64 pageno); +static bool SubTransPagePrecedes(int64 page1, int64 page2); /* @@ -73,7 +81,7 @@ static bool SubTransPagePrecedes(int page1, int page2); void SubTransSetParent(TransactionId xid, TransactionId parent) { - int pageno = TransactionIdToPage(xid); + int64 pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); int slotno; TransactionId *ptr; @@ -108,7 +116,7 @@ SubTransSetParent(TransactionId xid, TransactionId parent) TransactionId SubTransGetParent(TransactionId xid) { - int pageno = TransactionIdToPage(xid); + int64 pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); int slotno; TransactionId *ptr; @@ -233,7 +241,7 @@ BootStrapSUBTRANS(void) * Control lock must be held at entry, and will be held at exit. */ static int -ZeroSUBTRANSPage(int pageno) +ZeroSUBTRANSPage(int64 pageno) { return SimpleLruZeroPage(SubTransCtl, pageno); } @@ -249,8 +257,8 @@ void StartupSUBTRANS(TransactionId oldestActiveXID) { FullTransactionId nextXid; - int startPage; - int endPage; + int64 startPage; + int64 endPage; /* * Since we don't expect pg_subtrans to be valid across crashes, we @@ -307,7 +315,7 @@ CheckPointSUBTRANS(void) void ExtendSUBTRANS(TransactionId newestXact) { - int pageno; + int64 pageno; /* * No work except at first XID of a page. But beware: just after @@ -337,7 +345,7 @@ ExtendSUBTRANS(TransactionId newestXact) void TruncateSUBTRANS(TransactionId oldestXact) { - int cutoffPage; + int64 cutoffPage; /* * The cutoff point is the start of the segment containing oldestXact. We @@ -359,7 +367,7 @@ TruncateSUBTRANS(TransactionId oldestXact) * Analogous to CLOGPagePrecedes(). */ static bool -SubTransPagePrecedes(int page1, int page2) +SubTransPagePrecedes(int64 page1, int64 page2) { TransactionId xid1; TransactionId xid2; diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index ef909cf4e0..3c8b3f7b16 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -196,7 +196,7 @@ typedef struct AsyncQueueEntry */ typedef struct QueuePosition { - int page; /* SLRU page number */ + int64 page; /* SLRU page number */ int offset; /* byte offset within page */ } QueuePosition; @@ -443,8 +443,8 @@ static bool tryAdvanceTail = false; bool Trace_notify = false; /* local function prototypes */ -static int asyncQueuePageDiff(int p, int q); -static bool asyncQueuePagePrecedes(int p, int q); +static int64 asyncQueuePageDiff(int64 p, int64 q); +static bool asyncQueuePagePrecedes(int64 p, int64 q); static void queue_listen(ListenActionKind action, const char *channel); static void Async_UnlistenOnExit(int code, Datum arg); static void Exec_ListenPreCommit(void); @@ -477,10 +477,10 @@ static void ClearPendingActionsAndNotifies(void); * Compute the difference between two queue page numbers (i.e., p - q), * accounting for wraparound. */ -static int -asyncQueuePageDiff(int p, int q) +static int64 +asyncQueuePageDiff(int64 p, int64 q) { - int diff; + int64 diff; /* * We have to compare modulo (QUEUE_MAX_PAGE+1)/2. Both inputs should be @@ -504,7 +504,7 @@ asyncQueuePageDiff(int p, int q) * extant page, we need not assess entries within a page. */ static bool -asyncQueuePagePrecedes(int p, int q) +asyncQueuePagePrecedes(int64 p, int64 q) { return asyncQueuePageDiff(p, q) < 0; } @@ -1336,7 +1336,7 @@ asyncQueueIsFull(void) static bool asyncQueueAdvance(volatile QueuePosition *position, int entryLength) { - int pageno = QUEUE_POS_PAGE(*position); + int64 pageno = QUEUE_POS_PAGE(*position); int offset = QUEUE_POS_OFFSET(*position); bool pageJump = false; @@ -1409,7 +1409,7 @@ asyncQueueAddEntries(ListCell *nextNotify) { AsyncQueueEntry qe; QueuePosition queue_head; - int pageno; + int64 pageno; int offset; int slotno; diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index bfc352aed8..cf01c81159 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -437,7 +437,7 @@ static void SetPossibleUnsafeConflict(SERIALIZABLEXACT *roXact, SERIALIZABLEXACT static void ReleaseRWConflict(RWConflict conflict); static void FlagSxactUnsafe(SERIALIZABLEXACT *sxact); -static bool SerialPagePrecedesLogically(int page1, int page2); +static bool SerialPagePrecedesLogically(int64 page1, int64 page2); static void SerialInit(void); static void SerialAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo); static SerCommitSeqNo SerialGetMinConflictCommitSeqNo(TransactionId xid); @@ -724,7 +724,7 @@ FlagSxactUnsafe(SERIALIZABLEXACT *sxact) * Analogous to CLOGPagePrecedes(). */ static bool -SerialPagePrecedesLogically(int page1, int page2) +SerialPagePrecedesLogically(int64 page1, int64 page2) { TransactionId xid1; TransactionId xid2; @@ -744,7 +744,7 @@ SerialPagePrecedesLogicallyUnitTests(void) { int per_page = SERIAL_ENTRIESPERPAGE, offset = per_page / 2; - int newestPage, + int64 newestPage, oldestPage, headPage, targetPage; @@ -842,9 +842,9 @@ static void SerialAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo) { TransactionId tailXid; - int targetPage; + int64 targetPage; int slotno; - int firstZeroPage; + int64 firstZeroPage; bool isNewPage; Assert(TransactionIdIsValid(xid)); diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index e5597d3105..1a4bd74ae5 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -44,6 +44,7 @@ #include #endif +#include "access/transam.h" #include "catalog/pg_class_d.h" #include "common/file_perm.h" #include "common/logging.h" @@ -566,6 +567,306 @@ copy_subdir_files(const char *old_subdir, const char *new_subdir) check_ok(); } +// AALEKSEEV TODO FIXME: refactor vvv +/******************************************************************************/ +#define SLRU_PAGES_PER_SEGMENT 32 /* Should be equal to value from slru.h */ + +#define CLOG_BITS_PER_XACT 2 +#define CLOG_XACTS_PER_BYTE 4 +#define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE) +/* + * Rename the files from the old cluster into it + */ +typedef struct SLRUSegmentState +{ + const char *dir; + FILE *file; + int64 segno; + int64 pageno; + bool is_empty_segment; +} SLRUSegmentState; + +/* + * Old cluster SlruFileName (i.e. 32-bit) + */ +static char * +slru_filename_old(const char *path, int64 segno) +{ + Assert(segno <= PG_INT32_MAX); + return psprintf("%s/%04X", path, (int) segno); +} + +/* + * New cluster SlruFileName (i.e. 64-bit) + */ +static char * +slru_filename_new(const char *path, int64 segno) +{ + return psprintf("%s/%012llX", path, (long long) segno); +} + +/* + * Generalized fopen for SLRU segment file + */ +static inline FILE * +open_file(SLRUSegmentState *state, + char * (filename_fn)(const char *path, int64 segno), + char *mode, char *fatal_msg) +{ + char *filename = filename_fn(state->dir, state->segno); + FILE *fd = fopen(filename, mode); + + if (!fd) + pg_fatal(fatal_msg, filename); + + pfree(filename); + + return fd; +} + +/* + * Generalized fclose for SLRU segment file + */ +static void +close_file(SLRUSegmentState *state, + char * (filename_fn)(const char *path, int64 segno)) +{ + if (state->file != NULL) + { + if (fclose(state->file) != 0) + pg_fatal("could not close file \"%s\": %m", + filename_fn(state->dir, state->segno)); + state->file = NULL; + } +} + +/* + * Generalized fread of BLCKSZ fro, SLRU segment file + */ +static inline int +read_file(SLRUSegmentState *state, void *buf) +{ + size_t n = fread(buf, sizeof(char), BLCKSZ, state->file); + + if (n != 0) + return n; + + if (ferror(state->file)) + pg_fatal("could not read file \"%s\": %m", + slru_filename_old(state->dir, state->segno)); + + if (!feof(state->file)) + pg_fatal("unknown file read state \"%s\": %m", + slru_filename_old(state->dir, state->segno)); + + close_file(state, slru_filename_old); + + return 0; +} + +static int +read_old_segment_page(SLRUSegmentState *state, void *buf, bool *is_empty) +{ + int n; + + /* Open next segment file, if needed */ + if (!state->file) + { + state->file = open_file(state, slru_filename_old, "rb", + "could not open source file \"%s\": %m"); + + /* Set position to the needed page */ + if (fseek(state->file, state->pageno * BLCKSZ, SEEK_SET)) + close_file(state, slru_filename_old); + + /* + * Skip segment conversion if segment file doesn't exist. + * First segment file should exist in any case. + */ + if (state->segno != 0) + state->is_empty_segment = true; + } + + if (state->file) + { + /* Segment file does exist, read page from it */ + state->is_empty_segment = false; + + /* Try to read BLCKSZ bytes */ + n = read_file(state, buf); + *is_empty = (n == 0); + + /* Zeroing buf tail if needed */ + if (n) + memset((char *) buf + n, 0, BLCKSZ - n); + } + else + { + n = state->is_empty_segment ? + BLCKSZ : /* Skip empty block at the end of segment */ + 0; /* We reached the last segment */ + *is_empty = true; + + if (n) + memset((char *) buf, 0, BLCKSZ); + } + + state->pageno++; + + if (state->pageno >= SLRU_PAGES_PER_SEGMENT) + { + /* Start new segment */ + state->segno++; + state->pageno = 0; + close_file(state, slru_filename_old); + } + + return n; +} + +static void +write_new_segment_page(SLRUSegmentState *state, void *buf, bool is_empty) +{ + /* + * Create a new segment file if we still didn't. Creation is postponed + * until the first non-empty page is found. This helps not to create + * completely empty segments. + */ + if (!state->file && !is_empty) + { + state->file = open_file(state, slru_filename_new, "wb", + "could not open target file \"%s\": %m"); + + /* Write zeroes to the previously skipped prefix */ + if (state->pageno > 0) + { + char zerobuf[BLCKSZ] = {0}; + + for (int64 i = 0; i < state->pageno; i++) + { + if (fwrite(zerobuf, sizeof(char), BLCKSZ, state->file) != BLCKSZ) + pg_fatal("could not write file \"%s\": %m", + slru_filename_new(state->dir, state->segno)); + } + } + + } + + /* Write page to the new segment (if it was created) */ + if (state->file) + { + if (fwrite(buf, sizeof(char), BLCKSZ, state->file) != BLCKSZ) + pg_fatal("could not write file \"%s\": %m", + slru_filename_new(state->dir, state->segno)); + } + + state->pageno++; + + /* + * Did we reach the maximum page number? Then close segment file and + * create a new one on the next iteration + */ + if (state->pageno >= SLRU_PAGES_PER_SEGMENT) + { + state->segno++; + state->pageno = 0; + close_file(state, slru_filename_new); + } +} + +/* + * Convert pg_xact from 32bit to 64bit format. + * + * We read SLRU pages of pg_xact segments from old cluster one by one and write + * them in a new segments files. + * + * AALEKSEEV TODO FIXME rewrite this + */ +static void +convert_pg_xact_segments(const char *old_subdir, const char *new_subdir) +{ + SLRUSegmentState oldseg = {0}; + SLRUSegmentState newseg = {0}; + char buf[BLCKSZ] = {0}; + FullTransactionId oldestxid; + FullTransactionId nxtxid; + uint32 epoch; + int64 pageno; + uint64 xid; + + oldseg.dir = old_subdir; + newseg.dir = new_subdir; + + prep_status("AALEKSEEV DEBUG convert_pg_xact_segments(), old_subdir = %s, new_subdir = %s", + old_subdir, new_subdir); + + /* wraparound without epoch is not possible */ + if (old_cluster.controldata.chkpnt_nxtepoch == 0 && + old_cluster.controldata.chkpnt_oldstxid > old_cluster.controldata.chkpnt_nxtxid) + { + pg_fatal("inconsistent pg_xact of directory \"%s\"", + old_cluster.pgdata); + } + + /* get full transactions bounds from old cluster */ + epoch = old_cluster.controldata.chkpnt_nxtepoch; + nxtxid = FullTransactionIdFromEpochAndXid(epoch, + old_cluster.controldata.chkpnt_nxtxid); + if (old_cluster.controldata.chkpnt_oldstxid > XidFromFullTransactionId(nxtxid)) + --epoch; + + oldestxid = FullTransactionIdFromEpochAndXid(epoch, + old_cluster.controldata.chkpnt_oldstxid); + + /* get init segments and pages */ + pageno = oldestxid.value / CLOG_XACTS_PER_PAGE; + + oldseg.segno = pageno / SLRU_PAGES_PER_SEGMENT; + oldseg.pageno = pageno % SLRU_PAGES_PER_SEGMENT; + + newseg.segno = pageno / SLRU_PAGES_PER_SEGMENT; + newseg.pageno = pageno % SLRU_PAGES_PER_SEGMENT; + + /* Copy xid flags reading only needed segment pages */ + for (xid = oldestxid.value & ~(CLOG_XACTS_PER_PAGE - 1); + xid <= ((nxtxid.value - 1) & ~(CLOG_XACTS_PER_PAGE - 1)); + xid += CLOG_XACTS_PER_PAGE) + { + bool is_empty; + int len; + + /* Handle possible segment wraparound */ + if (oldseg.segno > MaxTransactionId / CLOG_XACTS_PER_PAGE / SLRU_PAGES_PER_SEGMENT) + { + Assert(!oldseg.pageno); + Assert(!oldseg.file); + Assert(!newseg.pageno); + Assert(!newseg.file); + + oldseg.segno = 0; + } + + len = read_old_segment_page(&oldseg, buf, &is_empty); + + /* + * Ignore read errors, copy all existing segment pages in the + * interesting xid range. + */ + is_empty |= len <= 0; + + Assert(len >= 0); + Assert(is_empty == false); + + write_new_segment_page(&newseg, buf, is_empty); + } + + /* Release resources */ + close_file(&oldseg, slru_filename_old); + close_file(&newseg, slru_filename_new); +} + +// ^^^ AALEKSEEV TODO FIXME - until here + static void copy_xact_xlog_xid(void) { @@ -573,10 +874,21 @@ copy_xact_xlog_xid(void) * Copy old commit logs to new data dir. pg_clog has been renamed to * pg_xact in post-10 clusters. */ - copy_subdir_files(GET_MAJOR_VERSION(old_cluster.major_version) <= 906 ? - "pg_clog" : "pg_xact", - GET_MAJOR_VERSION(new_cluster.major_version) <= 906 ? - "pg_clog" : "pg_xact"); +#define GetClogDirName(cluster) \ + GET_MAJOR_VERSION(cluster.major_version) <= 906 ? "pg_clog" : "pg_xact" + + if (old_cluster.controldata.cat_ver < SLRU_FORMAT_CHANGE_CAT_VER) + { + char *old_path = psprintf("%s/%s", old_cluster.pgdata, GetClogDirName(old_cluster)); + char *new_path = psprintf("%s/%s", new_cluster.pgdata, GetClogDirName(new_cluster)); + + convert_pg_xact_segments(old_path, new_path); + pfree(old_path); + pfree(new_path); + } + else + copy_subdir_files(GetClogDirName(old_cluster), + GetClogDirName(new_cluster)); prep_status("Setting oldest XID for new cluster"); exec_prog(UTILITY_LOG_FILE, NULL, true, true, diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 5f2a116f23..ca9c7978f8 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -126,6 +126,11 @@ extern char *output_files[]; */ #define JSONB_FORMAT_CHANGE_CAT_VER 201409291 +/* + * change in SLRU indexing to 64–bit integers + */ +#define SLRU_FORMAT_CHANGE_CAT_VER 202212092 + /* * Each relation is represented by a relinfo structure. diff --git a/src/bin/pg_verifybackup/t/003_corruption.pl b/src/bin/pg_verifybackup/t/003_corruption.pl index 0c304105c5..69119d30b7 100644 --- a/src/bin/pg_verifybackup/t/003_corruption.pl +++ b/src/bin/pg_verifybackup/t/003_corruption.pl @@ -174,7 +174,7 @@ sub mutilate_extra_tablespace_file sub mutilate_missing_file { my ($backup_path) = @_; - my $pathname = "$backup_path/pg_xact/0000"; + my $pathname = "$backup_path/pg_xact/000000000000"; unlink($pathname) || die "$pathname: $!"; return; } diff --git a/src/include/access/clog.h b/src/include/access/clog.h index d99444f073..8fd99ba670 100644 --- a/src/include/access/clog.h +++ b/src/include/access/clog.h @@ -31,7 +31,7 @@ typedef int XidStatus; typedef struct xl_clog_truncate { - int pageno; + int64 pageno; TransactionId oldestXact; Oid oldestXactDb; } xl_clog_truncate; diff --git a/src/include/access/commit_ts.h b/src/include/access/commit_ts.h index 5087cdce51..11fa3a922d 100644 --- a/src/include/access/commit_ts.h +++ b/src/include/access/commit_ts.h @@ -60,7 +60,7 @@ typedef struct xl_commit_ts_set typedef struct xl_commit_ts_truncate { - int pageno; + int64 pageno; TransactionId oldestXid; } xl_commit_ts_truncate; diff --git a/src/include/access/slru.h b/src/include/access/slru.h index a8a424d92d..859ec6e589 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -64,7 +64,7 @@ typedef struct SlruSharedData char **page_buffer; SlruPageStatus *page_status; bool *page_dirty; - int *page_number; + int64 *page_number; int *page_lru_count; LWLockPadded *buffer_locks; @@ -95,7 +95,7 @@ typedef struct SlruSharedData * this is not critical data, since we use it only to avoid swapping out * the latest page. */ - int latest_page_number; + int64 latest_page_number; /* SLRU's index for statistics purposes (might not be unique) */ int slru_stats_idx; @@ -127,7 +127,7 @@ typedef struct SlruCtlData * the behavior of this callback has no functional implications.) Use * SlruPagePrecedesUnitTests() in SLRUs meeting its criteria. */ - bool (*PagePrecedes) (int, int); + bool (*PagePrecedes) (int64, int64); /* * Dir is set during SimpleLruInit and does not change thereafter. Since @@ -143,10 +143,10 @@ extern Size SimpleLruShmemSize(int nslots, int nlsns); extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, LWLock *ctllock, const char *subdir, int tranche_id, SyncRequestHandler sync_handler); -extern int SimpleLruZeroPage(SlruCtl ctl, int pageno); -extern int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, +extern int SimpleLruZeroPage(SlruCtl ctl, int64 pageno); +extern int SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid); -extern int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, +extern int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno, TransactionId xid); extern void SimpleLruWritePage(SlruCtl ctl, int slotno); extern void SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied); @@ -155,20 +155,20 @@ extern void SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page); #else #define SlruPagePrecedesUnitTests(ctl, per_page) do {} while (0) #endif -extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage); -extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno); +extern void SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage); +extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int64 pageno); -typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int segpage, +typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int64 segpage, void *data); extern bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data); -extern void SlruDeleteSegment(SlruCtl ctl, int segno); +extern void SlruDeleteSegment(SlruCtl ctl, int64 segno); extern int SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path); /* SlruScanDirectory public callbacks */ extern bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, - int segpage, void *data); -extern bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, + int64 segpage, void *data); +extern bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int64 segpage, void *data); #endif /* SLRU_H */ diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 4258cd92c9..a2cc0dc96f 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -281,7 +281,7 @@ struct PGPROC TransactionId clogGroupMemberXid; /* transaction id of clog group member */ XidStatus clogGroupMemberXidStatus; /* transaction status of clog * group member */ - int clogGroupMemberPage; /* clog page corresponding to + int64 clogGroupMemberPage; /* clog page corresponding to * transaction id of clog group member */ XLogRecPtr clogGroupMemberLsn; /* WAL location of commit record for clog * group member */ diff --git a/src/include/storage/sync.h b/src/include/storage/sync.h index cfbcfa6797..ef2469ef91 100644 --- a/src/include/storage/sync.h +++ b/src/include/storage/sync.h @@ -52,7 +52,7 @@ typedef struct FileTag int16 handler; /* SyncRequestHandler value, saving space */ int16 forknum; /* ForkNumber, saving space */ RelFileLocator rlocator; - uint32 segno; + uint64 segno; } FileTag; extern void InitSync(void); diff --git a/src/test/modules/test_slru/expected/test_slru.out b/src/test/modules/test_slru/expected/test_slru.out index 0e66fdc205..8019ff916d 100644 --- a/src/test/modules/test_slru/expected/test_slru.out +++ b/src/test/modules/test_slru/expected/test_slru.out @@ -61,7 +61,7 @@ SELECT test_slru_page_writeall(); -- Flush the last page written out. SELECT test_slru_page_sync(12393); -NOTICE: Called SlruSyncFileTag() for segment 387 on path pg_test_slru/0183 +NOTICE: Called SlruSyncFileTag() for segment 387 on path pg_test_slru/000000000183 test_slru_page_sync --------------------- diff --git a/src/test/modules/test_slru/test_slru--1.0.sql b/src/test/modules/test_slru/test_slru--1.0.sql index 8635e7df01..202e8da3fd 100644 --- a/src/test/modules/test_slru/test_slru--1.0.sql +++ b/src/test/modules/test_slru/test_slru--1.0.sql @@ -1,21 +1,21 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION test_slru" to load this file. \quit -CREATE OR REPLACE FUNCTION test_slru_page_write(int, text) RETURNS VOID +CREATE OR REPLACE FUNCTION test_slru_page_write(bigint, text) RETURNS VOID AS 'MODULE_PATHNAME', 'test_slru_page_write' LANGUAGE C; CREATE OR REPLACE FUNCTION test_slru_page_writeall() RETURNS VOID AS 'MODULE_PATHNAME', 'test_slru_page_writeall' LANGUAGE C; -CREATE OR REPLACE FUNCTION test_slru_page_sync(int) RETURNS VOID +CREATE OR REPLACE FUNCTION test_slru_page_sync(bigint) RETURNS VOID AS 'MODULE_PATHNAME', 'test_slru_page_sync' LANGUAGE C; -CREATE OR REPLACE FUNCTION test_slru_page_read(int, bool DEFAULT true) RETURNS text +CREATE OR REPLACE FUNCTION test_slru_page_read(bigint, bool DEFAULT true) RETURNS text AS 'MODULE_PATHNAME', 'test_slru_page_read' LANGUAGE C; -CREATE OR REPLACE FUNCTION test_slru_page_readonly(int) RETURNS text +CREATE OR REPLACE FUNCTION test_slru_page_readonly(bigint) RETURNS text AS 'MODULE_PATHNAME', 'test_slru_page_readonly' LANGUAGE C; -CREATE OR REPLACE FUNCTION test_slru_page_exists(int) RETURNS bool +CREATE OR REPLACE FUNCTION test_slru_page_exists(bigint) RETURNS bool AS 'MODULE_PATHNAME', 'test_slru_page_exists' LANGUAGE C; -CREATE OR REPLACE FUNCTION test_slru_page_delete(int) RETURNS VOID +CREATE OR REPLACE FUNCTION test_slru_page_delete(bigint) RETURNS VOID AS 'MODULE_PATHNAME', 'test_slru_page_delete' LANGUAGE C; -CREATE OR REPLACE FUNCTION test_slru_page_truncate(int) RETURNS VOID +CREATE OR REPLACE FUNCTION test_slru_page_truncate(bigint) RETURNS VOID AS 'MODULE_PATHNAME', 'test_slru_page_truncate' LANGUAGE C; CREATE OR REPLACE FUNCTION test_slru_delete_all() RETURNS VOID AS 'MODULE_PATHNAME', 'test_slru_delete_all' LANGUAGE C; diff --git a/src/test/modules/test_slru/test_slru.c b/src/test/modules/test_slru/test_slru.c index 622f43da04..577428668a 100644 --- a/src/test/modules/test_slru/test_slru.c +++ b/src/test/modules/test_slru/test_slru.c @@ -54,7 +54,7 @@ static shmem_startup_hook_type prev_shmem_startup_hook = NULL; const char test_tranche_name[] = "test_slru_tranche"; static bool -test_slru_scan_cb(SlruCtl ctl, char *filename, int segpage, void *data) +test_slru_scan_cb(SlruCtl ctl, char *filename, int64 segpage, void *data) { elog(NOTICE, "Calling test_slru_scan_cb()"); return SlruScanDirCbDeleteAll(ctl, filename, segpage, data); @@ -63,7 +63,7 @@ test_slru_scan_cb(SlruCtl ctl, char *filename, int segpage, void *data) Datum test_slru_page_write(PG_FUNCTION_ARGS) { - int pageno = PG_GETARG_INT32(0); + int64 pageno = PG_GETARG_INT64(0); char *data = text_to_cstring(PG_GETARG_TEXT_PP(1)); int slotno; @@ -98,7 +98,7 @@ test_slru_page_writeall(PG_FUNCTION_ARGS) Datum test_slru_page_read(PG_FUNCTION_ARGS) { - int pageno = PG_GETARG_INT32(0); + int64 pageno = PG_GETARG_INT64(0); bool write_ok = PG_GETARG_BOOL(1); char *data = NULL; int slotno; @@ -116,7 +116,7 @@ test_slru_page_read(PG_FUNCTION_ARGS) Datum test_slru_page_readonly(PG_FUNCTION_ARGS) { - int pageno = PG_GETARG_INT32(0); + int64 pageno = PG_GETARG_INT64(0); char *data = NULL; int slotno; @@ -134,7 +134,7 @@ test_slru_page_readonly(PG_FUNCTION_ARGS) Datum test_slru_page_exists(PG_FUNCTION_ARGS) { - int pageno = PG_GETARG_INT32(0); + int64 pageno = PG_GETARG_INT64(0); bool found; LWLockAcquire(TestSLRULock, LW_EXCLUSIVE); @@ -147,7 +147,7 @@ test_slru_page_exists(PG_FUNCTION_ARGS) Datum test_slru_page_sync(PG_FUNCTION_ARGS) { - int pageno = PG_GETARG_INT32(0); + int64 pageno = PG_GETARG_INT64(0); FileTag ftag; char path[MAXPGPATH]; @@ -155,8 +155,8 @@ test_slru_page_sync(PG_FUNCTION_ARGS) ftag.segno = pageno / SLRU_PAGES_PER_SEGMENT; SlruSyncFileTag(TestSlruCtl, &ftag, path); - elog(NOTICE, "Called SlruSyncFileTag() for segment %u on path %s", - ftag.segno, path); + elog(NOTICE, "Called SlruSyncFileTag() for segment %lld on path %s", + (long long) ftag.segno, path); PG_RETURN_VOID(); } @@ -164,13 +164,14 @@ test_slru_page_sync(PG_FUNCTION_ARGS) Datum test_slru_page_delete(PG_FUNCTION_ARGS) { - int pageno = PG_GETARG_INT32(0); + int64 pageno = PG_GETARG_INT64(0); FileTag ftag; ftag.segno = pageno / SLRU_PAGES_PER_SEGMENT; SlruDeleteSegment(TestSlruCtl, ftag.segno); - elog(NOTICE, "Called SlruDeleteSegment() for segment %u", ftag.segno); + elog(NOTICE, "Called SlruDeleteSegment() for segment %lld", + (long long) ftag.segno); PG_RETURN_VOID(); } @@ -178,7 +179,7 @@ test_slru_page_delete(PG_FUNCTION_ARGS) Datum test_slru_page_truncate(PG_FUNCTION_ARGS) { - int pageno = PG_GETARG_INT32(0); + int64 pageno = PG_GETARG_INT64(0); SimpleLruTruncate(TestSlruCtl, pageno); PG_RETURN_VOID(); @@ -208,7 +209,7 @@ test_slru_shmem_request(void) } static bool -test_slru_page_precedes_logically(int page1, int page2) +test_slru_page_precedes_logically(int64 page1, int64 page2) { return page1 < page2; } -- 2.39.1