buffer manager refactoring to isolate ARC algorithm - Mailing list pgsql-patches
From | Tom Lane |
---|---|
Subject | buffer manager refactoring to isolate ARC algorithm |
Date | |
Msg-id | 3097.1107462271@sss.pgh.pa.us Whole thread Raw |
List | pgsql-patches |
I'm planning to apply the attached patch to hide the ARC-related data structures in freelist.c. There are no data structure or algorithm changes, just removal of ARC details from the shared header buf_internals.h, plus minor refactoring to hide the knowledge that ARC wants a twice-normal-size buffer lookup hashtable. This should allow us to make the ARC-or-not changes within freelist.c only. I did leave the "bufNext" fields of BufferDesc as-is. Strictly speaking this list is an internal data structure of freelist.c, but moving these fields out of BufferDesc would require a larger change than seems worthwhile, especially seeing that an LRU algorithm could make use of the same list. Any objections? regards, tom lane *** src/backend/storage/buffer/buf_init.c.orig Fri Dec 31 17:46:08 2004 --- src/backend/storage/buffer/buf_init.c Thu Feb 3 14:51:18 2005 *************** *** 73,79 **** * aborts, it should only unpin the buffers exactly the number of times it * has pinned them, so that it will not blow away buffers of another * backend. - * */ --- 73,78 ---- *************** *** 120,133 **** block = BufferBlocks; /* ! * link the buffers into a single linked list. This will become ! * the LIFO list of unused buffers returned by ! * StrategyGetBuffer(). */ for (i = 0; i < NBuffers; block += BLCKSZ, buf++, i++) { Assert(ShmemIsValid((unsigned long) block)); buf->bufNext = i + 1; CLEAR_BUFFERTAG(buf->tag); --- 119,135 ---- block = BufferBlocks; /* ! * Initialize all the buffer headers. */ for (i = 0; i < NBuffers; block += BLCKSZ, buf++, i++) { Assert(ShmemIsValid((unsigned long) block)); + /* + * The bufNext fields link together all totally-unused buffers. + * Subsequent management of this list is done by + * StrategyGetBuffer(). + */ buf->bufNext = i + 1; CLEAR_BUFFERTAG(buf->tag); *************** *** 142,148 **** buf->wait_backend_id = 0; } ! /* Correct last entry */ BufferDescriptors[NBuffers - 1].bufNext = -1; LWLockRelease(BufMgrLock); --- 144,150 ---- buf->wait_backend_id = 0; } ! /* Correct last entry of linked list */ BufferDescriptors[NBuffers - 1].bufNext = -1; LWLockRelease(BufMgrLock); *************** *** 178,184 **** /* * Convert shmem offsets into addresses as seen by this process. This ! * is just to speed up the BufferGetBlock() macro. */ for (i = 0; i < NBuffers; i++) BufferBlockPointers[i] = (Block) MAKE_PTR(BufferDescriptors[i].data); --- 180,187 ---- /* * Convert shmem offsets into addresses as seen by this process. This ! * is just to speed up the BufferGetBlock() macro. It is OK to do this ! * without any lock since the data pointers never change. */ for (i = 0; i < NBuffers; i++) BufferBlockPointers[i] = (Block) MAKE_PTR(BufferDescriptors[i].data); *************** *** 201,214 **** /* size of data pages */ size += NBuffers * MAXALIGN(BLCKSZ); ! /* size of buffer hash table */ ! size += hash_estimate_size(NBuffers * 2, sizeof(BufferLookupEnt)); ! ! /* size of the shared replacement strategy control block */ ! size += MAXALIGN(sizeof(BufferStrategyControl)); ! ! /* size of the ARC directory blocks */ ! size += MAXALIGN(NBuffers * 2 * sizeof(BufferStrategyCDB)); return size; } --- 204,211 ---- /* size of data pages */ size += NBuffers * MAXALIGN(BLCKSZ); ! /* size of stuff controlled by freelist.c */ ! size += StrategyShmemSize(); return size; } *** src/backend/storage/buffer/buf_table.c.orig Fri Dec 31 17:46:08 2004 --- src/backend/storage/buffer/buf_table.c Thu Feb 3 14:40:17 2005 *************** *** 1,11 **** /*------------------------------------------------------------------------- * * buf_table.c ! * routines for finding buffers in the buffer pool. * ! * NOTE: these days, what this table actually provides is a mapping from ! * BufferTags to CDB indexes, not directly to buffers. The function names ! * are thus slight misnomers. * * Note: all routines in this file assume that the BufMgrLock is held * by the caller, so no synchronization is needed. --- 1,11 ---- /*------------------------------------------------------------------------- * * buf_table.c ! * routines for mapping BufferTags to buffer indexes. * ! * NOTE: this module is called only by freelist.c, and the "buffer IDs" ! * it deals with are whatever freelist.c needs them to be; they may not be ! * directly equivalent to Buffer numbers. * * Note: all routines in this file assume that the BufMgrLock is held * by the caller, so no synchronization is needed. *************** *** 26,37 **** #include "storage/bufmgr.h" static HTAB *SharedBufHash; /* * Initialize shmem hash table for mapping buffers ! * size is the desired hash table size (2*NBuffers for ARC algorithm) */ void InitBufTable(int size) --- 26,54 ---- #include "storage/bufmgr.h" + /* entry for buffer lookup hashtable */ + typedef struct + { + BufferTag key; /* Tag of a disk page */ + int id; /* Associated buffer ID */ + } BufferLookupEnt; + static HTAB *SharedBufHash; /* + * Estimate space needed for mapping hashtable + * size is the desired hash table size (possibly more than NBuffers) + */ + int + BufTableShmemSize(int size) + { + return hash_estimate_size(size, sizeof(BufferLookupEnt)); + } + + /* * Initialize shmem hash table for mapping buffers ! * size is the desired hash table size (possibly more than NBuffers) */ void InitBufTable(int size) *************** *** 56,62 **** /* * BufTableLookup ! * Lookup the given BufferTag; return CDB index, or -1 if not found */ int BufTableLookup(BufferTag *tagPtr) --- 73,79 ---- /* * BufTableLookup ! * Lookup the given BufferTag; return buffer ID, or -1 if not found */ int BufTableLookup(BufferTag *tagPtr) *************** *** 76,85 **** /* * BufTableInsert ! * Insert a hashtable entry for given tag and CDB index */ void ! BufTableInsert(BufferTag *tagPtr, int cdb_id) { BufferLookupEnt *result; bool found; --- 93,102 ---- /* * BufTableInsert ! * Insert a hashtable entry for given tag and buffer ID */ void ! BufTableInsert(BufferTag *tagPtr, int buf_id) { BufferLookupEnt *result; bool found; *************** *** 92,106 **** (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory"))); ! if (found) /* found something else in the table? */ elog(ERROR, "shared buffer hash table corrupted"); ! result->id = cdb_id; } /* * BufTableDelete ! * Delete the hashtable entry for given tag */ void BufTableDelete(BufferTag *tagPtr) --- 109,123 ---- (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory"))); ! if (found) /* found something already in the table? */ elog(ERROR, "shared buffer hash table corrupted"); ! result->id = buf_id; } /* * BufTableDelete ! * Delete the hashtable entry for given tag (which must exist) */ void BufTableDelete(BufferTag *tagPtr) *** src/backend/storage/buffer/freelist.c.orig Fri Dec 31 17:46:08 2004 --- src/backend/storage/buffer/freelist.c Thu Feb 3 14:45:50 2005 *************** *** 25,30 **** --- 25,75 ---- #include "storage/bufmgr.h" + /* + * Definitions for the buffer replacement strategy + */ + #define STRAT_LIST_UNUSED (-1) + #define STRAT_LIST_B1 0 + #define STRAT_LIST_T1 1 + #define STRAT_LIST_T2 2 + #define STRAT_LIST_B2 3 + #define STRAT_NUM_LISTS 4 + + /* + * The Cache Directory Block (CDB) of the Adaptive Replacement Cache (ARC) + */ + typedef struct + { + int prev; /* list links */ + int next; + short list; /* ID of list it is currently in */ + bool t1_vacuum; /* t => present only because of VACUUM */ + TransactionId t1_xid; /* the xid this entry went onto T1 */ + BufferTag buf_tag; /* page identifier */ + int buf_id; /* currently assigned data buffer, or -1 */ + } BufferStrategyCDB; + + /* + * The shared ARC control information. + */ + typedef struct + { + int target_T1_size; /* What T1 size are we aiming for */ + int listUnusedCDB; /* All unused StrategyCDB */ + int listHead[STRAT_NUM_LISTS]; /* ARC lists B1, T1, T2 + * and B2 */ + int listTail[STRAT_NUM_LISTS]; + int listSize[STRAT_NUM_LISTS]; + Buffer listFreeBuffers; /* List of unused buffers */ + + long num_lookup; /* Some hit statistics */ + long num_hit[STRAT_NUM_LISTS]; + time_t stat_report; + + /* Array of CDB's starts here */ + BufferStrategyCDB cdb[1]; /* VARIABLE SIZE ARRAY */ + } BufferStrategyControl; + /* GUC variable: time in seconds between statistics reports */ int DebugSharedBuffers = 0; *************** *** 811,816 **** --- 856,883 ---- return num_buffer_dirty; } + + /* + * StrategyShmemSize + * + * estimate the size of shared memory used by the freelist-related structures. + */ + int + StrategyShmemSize(void) + { + int size = 0; + + /* size of CDB lookup hash table */ + size += BufTableShmemSize(NBuffers * 2); + + /* size of the shared replacement strategy control block */ + size += MAXALIGN(sizeof(BufferStrategyControl)); + + /* size of the ARC directory blocks */ + size += MAXALIGN(NBuffers * 2 * sizeof(BufferStrategyCDB)); + + return size; + } /* * StrategyInitialize -- initialize the buffer cache replacement *** src/include/storage/buf_internals.h.orig Fri Dec 31 17:47:02 2004 --- src/include/storage/buf_internals.h Thu Feb 3 14:51:11 2005 *************** *** 17,24 **** #include "storage/backendid.h" #include "storage/buf.h" - #include "storage/lmgr.h" #include "storage/lwlock.h" /* --- 17,25 ---- #include "storage/backendid.h" #include "storage/buf.h" #include "storage/lwlock.h" + #include "storage/shmem.h" + #include "utils/rel.h" /* *************** *** 40,49 **** * Buffer tag identifies which disk block the buffer contains. * * Note: the BufferTag data must be sufficient to determine where to write the ! * block, even during a "blind write" with no relcache entry. It's possible ! * that the backend flushing the buffer doesn't even believe the relation is ! * visible yet (its xact may have started before the xact that created the ! * rel). The storage manager must be able to cope anyway. * * Note: if there's any pad bytes in the struct, INIT_BUFFERTAG will have * to be fixed to zero them, since this struct is used as a hash key. --- 41,50 ---- * Buffer tag identifies which disk block the buffer contains. * * Note: the BufferTag data must be sufficient to determine where to write the ! * block, without reference to pg_class or pg_tablespace entries. It's ! * possible that the backend flushing the buffer doesn't even believe the ! * relation is visible yet (its xact may have started before the xact that ! * created the rel). The storage manager must be able to cope anyway. * * Note: if there's any pad bytes in the struct, INIT_BUFFERTAG will have * to be fixed to zero them, since this struct is used as a hash key. *************** *** 107,164 **** #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1) - /* entry for buffer lookup hashtable */ - typedef struct - { - BufferTag key; /* Tag of a disk page */ - int id; /* CDB id of associated CDB */ - } BufferLookupEnt; ! /* ! * Definitions for the buffer replacement strategy ! */ ! #define STRAT_LIST_UNUSED (-1) ! #define STRAT_LIST_B1 0 ! #define STRAT_LIST_T1 1 ! #define STRAT_LIST_T2 2 ! #define STRAT_LIST_B2 3 ! #define STRAT_NUM_LISTS 4 ! ! /* ! * The Cache Directory Block (CDB) of the Adaptive Replacement Cache (ARC) ! */ ! typedef struct ! { ! int prev; /* list links */ ! int next; ! short list; /* ID of list it is currently in */ ! bool t1_vacuum; /* t => present only because of VACUUM */ ! TransactionId t1_xid; /* the xid this entry went onto T1 */ ! BufferTag buf_tag; /* page identifier */ ! int buf_id; /* currently assigned data buffer, or -1 */ ! } BufferStrategyCDB; ! ! /* ! * The shared ARC control information. ! */ ! typedef struct ! { ! int target_T1_size; /* What T1 size are we aiming for */ ! int listUnusedCDB; /* All unused StrategyCDB */ ! int listHead[STRAT_NUM_LISTS]; /* ARC lists B1, T1, T2 ! * and B2 */ ! int listTail[STRAT_NUM_LISTS]; ! int listSize[STRAT_NUM_LISTS]; ! Buffer listFreeBuffers; /* List of unused buffers */ ! ! long num_lookup; /* Some hit statistics */ ! long num_hit[STRAT_NUM_LISTS]; ! time_t stat_report; ! ! /* Array of CDB's starts here */ ! BufferStrategyCDB cdb[1]; /* VARIABLE SIZE ARRAY */ ! } BufferStrategyControl; /* counters in buf_init.c */ extern long int ReadBufferCount; --- 108,119 ---- #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1) ! /* in bufmgr.c */ ! extern BufferDesc *BufferDescriptors; + /* in localbuf.c */ + extern BufferDesc *LocalBufferDescriptors; /* counters in buf_init.c */ extern long int ReadBufferCount; *************** *** 170,180 **** /* ! * Bufmgr Interface: */ - /* Internal routines: only called by bufmgr */ - /* freelist.c */ extern BufferDesc *StrategyBufferLookup(BufferTag *tagPtr, bool recheck, int *cdb_found_index); --- 125,133 ---- /* ! * Internal routines: only called by bufmgr */ /* freelist.c */ extern BufferDesc *StrategyBufferLookup(BufferTag *tagPtr, bool recheck, int *cdb_found_index); *************** *** 185,204 **** extern void StrategyHintVacuum(bool vacuum_active); extern int StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags, int max_buffers); extern void StrategyInitialize(bool init); /* buf_table.c */ extern void InitBufTable(int size); extern int BufTableLookup(BufferTag *tagPtr); ! extern void BufTableInsert(BufferTag *tagPtr, int cdb_id); extern void BufTableDelete(BufferTag *tagPtr); - /* bufmgr.c */ - extern BufferDesc *BufferDescriptors; - /* localbuf.c */ - extern BufferDesc *LocalBufferDescriptors; - extern BufferDesc *LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr); extern void WriteLocalBuffer(Buffer buffer, bool release); --- 138,154 ---- extern void StrategyHintVacuum(bool vacuum_active); extern int StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags, int max_buffers); + extern int StrategyShmemSize(void); extern void StrategyInitialize(bool init); /* buf_table.c */ + extern int BufTableShmemSize(int size); extern void InitBufTable(int size); extern int BufTableLookup(BufferTag *tagPtr); ! extern void BufTableInsert(BufferTag *tagPtr, int buf_id); extern void BufTableDelete(BufferTag *tagPtr); /* localbuf.c */ extern BufferDesc *LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr); extern void WriteLocalBuffer(Buffer buffer, bool release);
pgsql-patches by date: