From c98ece2fa6ca247a52c7f76d2d1999cc1683f34a Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Sat, 28 Jul 2018 01:25:12 +1200 Subject: [PATCH] Cache file sizes to avoid lseek() calls. Use a shared invalidation counter to allow md.c to cache file sizes for databases that reach a steady size. XXX WIP -- highly experimental, there may be much better ideas than this, and the memory synchronisation may not be strong enough! Also smgrinit2 isn't a good name, obv. --- src/backend/storage/smgr/md.c | 45 ++++++++++++++++++++++++++++++- src/backend/storage/smgr/smgr.c | 15 +++++++++++ src/backend/utils/init/postinit.c | 2 ++ src/include/storage/smgr.h | 2 ++ 4 files changed, 63 insertions(+), 1 deletion(-) diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 86013a5c8b..38f659afd0 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -110,6 +110,8 @@ typedef struct _MdfdVec { File mdfd_vfd; /* fd number in fd.c's pool */ BlockNumber mdfd_segno; /* segment number, from 0 */ + BlockNumber nblocks; /* cached version of number of blocks */ + uint32 relsize_change_counter; /* used for invalidation */ } MdfdVec; static MemoryContext MdCxt; /* context for all MdfdVec objects */ @@ -198,6 +200,13 @@ static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forkno, static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg); +typedef struct MdSharedData +{ + /* XXX could have an array of these, and use rel OID % nelements? */ + pg_atomic_uint32 relsize_change_counter; +} MdSharedData; + +static MdSharedData *MdShared; /* * mdinit() -- Initialize private state for magnetic disk storage manager. @@ -244,6 +253,16 @@ mdinit(void) } } +void +mdinit2(void) +{ + bool found; + + MdShared = ShmemInitStruct("MdShared", sizeof(MdSharedData), &found); + if (!found) + pg_atomic_init_u32(&MdShared->relsize_change_counter, 0); +} + /* * In archive recovery, we rely on checkpointer to do fsyncs, but we will have * already created the pendingOpsTable during initialization of the startup @@ -538,6 +557,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, nbytes, BLCKSZ, blocknum), errhint("Check free disk space."))); } + pg_atomic_fetch_add_u32(&MdShared->relsize_change_counter, 1); if (!skipFsync && !SmgrIsTemp(reln)) register_dirty_segment(reln, forknum, v); @@ -600,6 +620,8 @@ mdopen(SMgrRelation reln, ForkNumber forknum, int behavior) mdfd = &reln->md_seg_fds[forknum][0]; mdfd->mdfd_vfd = fd; mdfd->mdfd_segno = 0; + mdfd->nblocks = InvalidBlockNumber; + mdfd->relsize_change_counter = 0; Assert(_mdnblocks(reln, forknum, mdfd) <= ((BlockNumber) RELSEG_SIZE)); @@ -986,6 +1008,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) } curopensegs--; } + pg_atomic_fetch_add_u32(&MdShared->relsize_change_counter, 1); } /* @@ -1950,8 +1973,24 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) { + uint32 relsize_change_counter = 0; + BlockNumber result; off_t len; + if (MdShared) + { + /* + * When acquiring a snapshot, we executed a memory barrier. Likewise + * for a relation extension lock. Therefore we must be able to see + * a value of relsize_change_counter fresh enough for our purposes. + * XXX Right? Hmm. + */ + relsize_change_counter = pg_atomic_read_u32(&MdShared->relsize_change_counter); + if (seg->nblocks != InvalidBlockNumber && + seg->relsize_change_counter == relsize_change_counter) + return seg->nblocks; + } + len = FileSize(seg->mdfd_vfd); if (len < 0) ereport(ERROR, @@ -1959,5 +1998,9 @@ _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) errmsg("could not seek to end of file \"%s\": %m", FilePathName(seg->mdfd_vfd)))); /* note that this calculation will ignore any partial block at EOF */ - return (BlockNumber) (len / BLCKSZ); + result = len / BLCKSZ; + seg->nblocks = result; + seg->relsize_change_counter = relsize_change_counter; + + return result; } diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 189342ef86..06cd70cf03 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -38,6 +38,7 @@ typedef struct f_smgr { void (*smgr_init) (void); /* may be NULL */ + void (*smgr_init2) (void); /* may be NULL */ void (*smgr_shutdown) (void); /* may be NULL */ void (*smgr_close) (SMgrRelation reln, ForkNumber forknum); void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, @@ -69,6 +70,7 @@ static const f_smgr smgrsw[] = { /* magnetic disk */ { .smgr_init = mdinit, + .smgr_init2 = mdinit2, .smgr_shutdown = NULL, .smgr_close = mdclose, .smgr_create = mdcreate, @@ -128,6 +130,19 @@ smgrinit(void) on_proc_exit(smgrshutdown, 0); } +/* Run after shared memory is initialized. */ +void +smgrinit2(void) +{ + int i; + + for (i = 0; i < NSmgr; i++) + { + if (smgrsw[i].smgr_init2) + smgrsw[i].smgr_init2(); + } +} + /* * on_proc_exit hook for smgr cleanup during backend shutdown */ diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 4f1d2a0d28..3813c64ddb 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -594,6 +594,8 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, elog(DEBUG3, "InitPostgres"); + smgrinit2(); + /* * Add my PGPROC struct to the ProcArray. * diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index c843bbc969..45e05955b5 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -81,6 +81,7 @@ typedef SMgrRelationData *SMgrRelation; RelFileNodeBackendIsTemp((smgr)->smgr_rnode) extern void smgrinit(void); +extern void smgrinit2(void); extern SMgrRelation smgropen(RelFileNode rnode, BackendId backend); extern bool smgrexists(SMgrRelation reln, ForkNumber forknum); extern void smgrsetowner(SMgrRelation *owner, SMgrRelation reln); @@ -116,6 +117,7 @@ extern void AtEOXact_SMgr(void); /* in md.c */ extern void mdinit(void); +extern void mdinit2(void); extern void mdclose(SMgrRelation reln, ForkNumber forknum); extern void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo); extern bool mdexists(SMgrRelation reln, ForkNumber forknum); -- 2.19.1