Re: [HACKERS] WAL logging problem in 9.4.3? - Mailing list pgsql-hackers
From | Kyotaro Horiguchi |
---|---|
Subject | Re: [HACKERS] WAL logging problem in 9.4.3? |
Date | |
Msg-id | 20191121.194858.792948544668017842.horikyota.ntt@gmail.com Whole thread Raw |
In response to | Re: [HACKERS] WAL logging problem in 9.4.3? (Kyotaro Horiguchi <horikyota.ntt@gmail.com>) |
List | pgsql-hackers |
At Thu, 21 Nov 2019 16:01:07 +0900 (JST), Kyotaro Horiguchi <horikyota.ntt@gmail.com> wrote in > > For deleting relfilenodes, smgrDoPendingDeletes() collects a list for > > smgrdounlinkall() to pass to DropRelFileNodesAllBuffers(), which is > > sophisticated about optimizing the shared buffers scan. Commit 279628a > > introduced that, in 2013. I think smgrDoPendingSyncs() should do likewise, to > Seems reasonable. Please wait a minite. This is the first cut of that. This makes the function FlushRelationBuffersWithoutRelcache useless, which was introducedin this work. The first patch reverts it, then the second patch adds the bulk sync feature. The new function FlushRelFileNodesAllBuffers, differently from DropRelFileNodesAllBuffers, takes SMgrRelation which is required by FlushBuffer(). So it takes somewhat tricky way, where type SMgrSortArray pointer to which is compatible with RelFileNode is used. regards. -- Kyotaro Horiguchi NTT Open Source Software Center From c51b44734d88fb19b568c4c0240848c8be2b7cf4 Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi <horikyota.ntt@gmail.com> Date: Thu, 21 Nov 2019 19:28:35 +0900 Subject: [PATCH 1/2] Revert FlushRelationBuffersWithoutRelcache. Succeeding patch makes the function useless and the function is no longer useful globally. Revert it. --- src/backend/storage/buffer/bufmgr.c | 27 ++++++++++----------------- src/include/storage/bufmgr.h | 2 -- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 746ce477fc..67bbb26cae 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -3203,27 +3203,20 @@ PrintPinnedBufs(void) void FlushRelationBuffers(Relation rel) { - RelationOpenSmgr(rel); - - FlushRelationBuffersWithoutRelcache(rel->rd_smgr, - RelationUsesLocalBuffers(rel)); -} - -void -FlushRelationBuffersWithoutRelcache(SMgrRelation smgr, bool islocal) -{ - RelFileNode rnode = smgr->smgr_rnode.node; - int i; + int i; BufferDesc *bufHdr; - if (islocal) + /* Open rel at the smgr level if not already done */ + RelationOpenSmgr(rel); + + if (RelationUsesLocalBuffers(rel)) { for (i = 0; i < NLocBuffer; i++) { uint32 buf_state; bufHdr = GetLocalBufferDescriptor(i); - if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) && + if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) && ((buf_state = pg_atomic_read_u32(&bufHdr->state)) & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY)) { @@ -3240,7 +3233,7 @@ FlushRelationBuffersWithoutRelcache(SMgrRelation smgr, bool islocal) PageSetChecksumInplace(localpage, bufHdr->tag.blockNum); - smgrwrite(smgr, + smgrwrite(rel->rd_smgr, bufHdr->tag.forkNum, bufHdr->tag.blockNum, localpage, @@ -3270,18 +3263,18 @@ FlushRelationBuffersWithoutRelcache(SMgrRelation smgr, bool islocal) * As in DropRelFileNodeBuffers, an unlocked precheck should be safe * and saves some cycles. */ - if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode)) + if (!RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node)) continue; ReservePrivateRefCountEntry(); buf_state = LockBufHdr(bufHdr); - if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) && + if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) && (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY)) { PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, smgr); + FlushBuffer(bufHdr, rel->rd_smgr); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); UnpinBuffer(bufHdr, true); } diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 8097d5ab22..8cd1cf25d9 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -192,8 +192,6 @@ extern BlockNumber RelationGetNumberOfBlocksInFork(Relation relation, ForkNumber forkNum); extern void FlushOneBuffer(Buffer buffer); extern void FlushRelationBuffers(Relation rel); -extern void FlushRelationBuffersWithoutRelcache(struct SMgrRelationData *smgr, - bool islocal); extern void FlushDatabaseBuffers(Oid dbid); extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock); -- 2.23.0 From 882731fcf063269d0bf85c57f23c83b9570e5df5 Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi <horikyota.ntt@gmail.com> Date: Thu, 21 Nov 2019 19:33:18 +0900 Subject: [PATCH 2/2] Improve the performance of relation syncs. We can improve performance of syncing multiple files at once in the same way as b41669118. This reduces the number of scans on the whole shared_bufffers from the number of synced relations to one. --- src/backend/catalog/storage.c | 28 +++++-- src/backend/storage/buffer/bufmgr.c | 113 ++++++++++++++++++++++++++++ src/backend/storage/smgr/smgr.c | 38 +++++++++- src/include/storage/bufmgr.h | 1 + src/include/storage/smgr.h | 1 + 5 files changed, 174 insertions(+), 7 deletions(-) diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index 51c233dac6..65811b2a9e 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -533,6 +533,9 @@ smgrDoPendingSyncs(void) { PendingRelDelete *pending; HTAB *delhash = NULL; + int nrels = 0, + maxrels = 0; + SMgrRelation *srels = NULL; if (XLogIsNeeded()) return; /* no relation can use this */ @@ -573,7 +576,7 @@ smgrDoPendingSyncs(void) for (pending = pendingDeletes; pending != NULL; pending = pending->next) { - bool to_be_removed = false; /* don't sync if aborted */ + bool to_be_removed = false; ForkNumber fork; BlockNumber nblocks[MAX_FORKNUM + 1]; BlockNumber total_blocks = 0; @@ -623,14 +626,21 @@ smgrDoPendingSyncs(void) */ if (total_blocks * BLCKSZ >= wal_skip_threshold * 1024) { - /* Flush all buffers then sync the file */ - FlushRelationBuffersWithoutRelcache(srel, false); + /* relations to sync are passed to smgrdosyncall at once */ - for (fork = 0; fork <= MAX_FORKNUM; fork++) + /* allocate the initial array, or extend it, if needed */ + if (maxrels == 0) { - if (smgrexists(srel, fork)) - smgrimmedsync(srel, fork); + maxrels = 8; + srels = palloc(sizeof(SMgrRelation) * maxrels); } + else if (maxrels <= nrels) + { + maxrels *= 2; + srels = repalloc(srels, sizeof(SMgrRelation) * maxrels); + } + + srels[nrels++] = srel; } else { @@ -658,6 +668,12 @@ smgrDoPendingSyncs(void) if (delhash) hash_destroy(delhash); + + if (nrels > 0) + { + smgrdosyncall(srels, nrels); + pfree(srels); + } } /* diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 67bbb26cae..56314653ae 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -105,6 +105,19 @@ typedef struct CkptTsStatus int index; } CkptTsStatus; +/* + * Type for array used to sort SMgrRelations + * + * FlushRelFileNodesAllBuffers shares the same comparator function with + * DropRelFileNodeBuffers. Pointer to this struct and RelFileNode must + * be compatible. + */ +typedef struct SMgrSortArray +{ + RelFileNode rnode; /* This must be the first member */ + SMgrRelation srel; +} SMgrSortArray; + /* GUC variables */ bool zero_damaged_pages = false; int bgwriter_lru_maxpages = 100; @@ -3283,6 +3296,106 @@ FlushRelationBuffers(Relation rel) } } +/* --------------------------------------------------------------------- + * FlushRelFileNodesAllBuffers + * + * This function flushes out the buffer pool all the pages of all + * forks of the specified smgr relations. It's equivalent to + * calling FlushRelationBuffers once per fork per relation, but the + * parameter is not Relation but SMgrRelation + * -------------------------------------------------------------------- + */ +void +FlushRelFileNodesAllBuffers(SMgrRelation *smgrs, int nrels) +{ + int i; + SMgrSortArray *srels; + bool use_bsearch; + + if (nrels == 0) + return; + + /* fill-in array for qsort */ + srels = palloc(sizeof(SMgrSortArray) * nrels); + + for (i = 0 ; i < nrels ; i++) + { + Assert (!RelFileNodeBackendIsTemp(smgrs[i]->smgr_rnode)); + + srels[i].rnode = smgrs[i]->smgr_rnode.node; + srels[i].srel = smgrs[i]; + } + + /* + * Save the bsearch overhead for low number of relations to + * sync. See DropRelFileNodesAllBuffers for details. The name DROP_* + * is for historical reasons. + */ + use_bsearch = nrels > DROP_RELS_BSEARCH_THRESHOLD; + + /* sort the list of SMgrRelations if necessary */ + if (use_bsearch) + pg_qsort(srels, nrels, sizeof(SMgrSortArray), rnode_comparator); + + /* Make sure we can handle the pin inside the loop */ + ResourceOwnerEnlargeBuffers(CurrentResourceOwner); + + for (i = 0; i < NBuffers; i++) + { + SMgrSortArray *srelent = NULL; + BufferDesc *bufHdr = GetBufferDescriptor(i); + uint32 buf_state; + + /* + * As in DropRelFileNodeBuffers, an unlocked precheck should be safe + * and saves some cycles. + */ + + if (!use_bsearch) + { + int j; + + for (j = 0; j < nrels; j++) + { + if (RelFileNodeEquals(bufHdr->tag.rnode, srels[j].rnode)) + { + srelent = &srels[j]; + break; + } + } + + } + else + { + srelent = bsearch((const void *) &(bufHdr->tag.rnode), + srels, nrels, sizeof(SMgrSortArray), + rnode_comparator); + } + + /* buffer doesn't belong to any of the given relfilenodes; skip it */ + if (srelent == NULL) + continue; + + /* Ensure there's a free array slot for PinBuffer_Locked */ + ReservePrivateRefCountEntry(); + + buf_state = LockBufHdr(bufHdr); + if (RelFileNodeEquals(bufHdr->tag.rnode, srelent->rnode) && + (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY)) + { + PinBuffer_Locked(bufHdr); + LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); + FlushBuffer(bufHdr, srelent->srel); + LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); + UnpinBuffer(bufHdr, true); + } + else + UnlockBufHdr(bufHdr, buf_state); + } + + pfree(srels); +} + /* --------------------------------------------------------------------- * FlushDatabaseBuffers * diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index b50c69b438..f79f2df40f 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -388,6 +388,43 @@ smgrdounlink(SMgrRelation reln, bool isRedo) smgrsw[which].smgr_unlink(rnode, InvalidForkNumber, isRedo); } +/* + * smgrdosyncall() -- Immediately sync all forks of all given relations + * + * All forks of all given relations are syncd out to the store. + * + * This is equivalent to flusing all buffers FlushRelationBuffers for each + * smgr relation then calling smgrimmedsync for all forks of each smgr + * relation, but it's significantly quicker so should be preferred when + * possible. + */ +void +smgrdosyncall(SMgrRelation *rels, int nrels) +{ + int i = 0; + ForkNumber forknum; + + if (nrels == 0) + return; + + /* We need to flush all buffers for the relations before sync. */ + FlushRelFileNodesAllBuffers(rels, nrels); + + /* + * Sync the physical file(s). + */ + for (i = 0; i < nrels; i++) + { + int which = rels[i]->smgr_which; + + for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) + { + if (smgrsw[which].smgr_exists(rels[i], forknum)) + smgrsw[which].smgr_immedsync(rels[i], forknum); + } + } +} + /* * smgrdounlinkall() -- Immediately unlink all forks of all given relations * @@ -469,7 +506,6 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo) pfree(rnodes); } - /* * smgrextend() -- Add a new block to a file. * diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 8cd1cf25d9..3f85e8c6fe 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -195,6 +195,7 @@ extern void FlushRelationBuffers(Relation rel); extern void FlushDatabaseBuffers(Oid dbid); extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock); +extern void FlushRelFileNodesAllBuffers(struct SMgrRelationData **smgrs, int nrels); extern void DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes); extern void DropDatabaseBuffers(Oid dbid); diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 1543d8d870..31a5ecd059 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -89,6 +89,7 @@ extern void smgrcloseall(void); extern void smgrclosenode(RelFileNodeBackend rnode); extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo); extern void smgrdounlink(SMgrRelation reln, bool isRedo); +extern void smgrdosyncall(SMgrRelation *rels, int nrels); extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo); extern void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync); -- 2.23.0
pgsql-hackers by date: