buffer manager refactoring to isolate ARC algorithm - Mailing list pgsql-patches

From Tom Lane
Subject buffer manager refactoring to isolate ARC algorithm
Date
Msg-id 3097.1107462271@sss.pgh.pa.us
Whole thread Raw
List pgsql-patches
I'm planning to apply the attached patch to hide the ARC-related data
structures in freelist.c.  There are no data structure or algorithm
changes, just removal of ARC details from the shared header
buf_internals.h, plus minor refactoring to hide the knowledge that ARC
wants a twice-normal-size buffer lookup hashtable.  This should allow us
to make the ARC-or-not changes within freelist.c only.

I did leave the "bufNext" fields of BufferDesc as-is.  Strictly speaking
this list is an internal data structure of freelist.c, but moving these
fields out of BufferDesc would require a larger change than seems
worthwhile, especially seeing that an LRU algorithm could make use of
the same list.

Any objections?

            regards, tom lane

*** src/backend/storage/buffer/buf_init.c.orig    Fri Dec 31 17:46:08 2004
--- src/backend/storage/buffer/buf_init.c    Thu Feb  3 14:51:18 2005
***************
*** 73,79 ****
   *        aborts, it should only unpin the buffers exactly the number of times it
   *        has pinned them, so that it will not blow away buffers of another
   *        backend.
-  *
   */


--- 73,78 ----
***************
*** 120,133 ****
          block = BufferBlocks;

          /*
!          * link the buffers into a single linked list. This will become
!          * the LIFO list of unused buffers returned by
!          * StrategyGetBuffer().
           */
          for (i = 0; i < NBuffers; block += BLCKSZ, buf++, i++)
          {
              Assert(ShmemIsValid((unsigned long) block));

              buf->bufNext = i + 1;

              CLEAR_BUFFERTAG(buf->tag);
--- 119,135 ----
          block = BufferBlocks;

          /*
!          * Initialize all the buffer headers.
           */
          for (i = 0; i < NBuffers; block += BLCKSZ, buf++, i++)
          {
              Assert(ShmemIsValid((unsigned long) block));

+             /*
+              * The bufNext fields link together all totally-unused buffers.
+              * Subsequent management of this list is done by
+              * StrategyGetBuffer().
+              */
              buf->bufNext = i + 1;

              CLEAR_BUFFERTAG(buf->tag);
***************
*** 142,148 ****
              buf->wait_backend_id = 0;
          }

!         /* Correct last entry */
          BufferDescriptors[NBuffers - 1].bufNext = -1;

          LWLockRelease(BufMgrLock);
--- 144,150 ----
              buf->wait_backend_id = 0;
          }

!         /* Correct last entry of linked list */
          BufferDescriptors[NBuffers - 1].bufNext = -1;

          LWLockRelease(BufMgrLock);
***************
*** 178,184 ****

      /*
       * Convert shmem offsets into addresses as seen by this process. This
!      * is just to speed up the BufferGetBlock() macro.
       */
      for (i = 0; i < NBuffers; i++)
          BufferBlockPointers[i] = (Block) MAKE_PTR(BufferDescriptors[i].data);
--- 180,187 ----

      /*
       * Convert shmem offsets into addresses as seen by this process. This
!      * is just to speed up the BufferGetBlock() macro.  It is OK to do this
!      * without any lock since the data pointers never change.
       */
      for (i = 0; i < NBuffers; i++)
          BufferBlockPointers[i] = (Block) MAKE_PTR(BufferDescriptors[i].data);
***************
*** 201,214 ****
      /* size of data pages */
      size += NBuffers * MAXALIGN(BLCKSZ);

!     /* size of buffer hash table */
!     size += hash_estimate_size(NBuffers * 2, sizeof(BufferLookupEnt));
!
!     /* size of the shared replacement strategy control block */
!     size += MAXALIGN(sizeof(BufferStrategyControl));
!
!     /* size of the ARC directory blocks */
!     size += MAXALIGN(NBuffers * 2 * sizeof(BufferStrategyCDB));

      return size;
  }
--- 204,211 ----
      /* size of data pages */
      size += NBuffers * MAXALIGN(BLCKSZ);

!     /* size of stuff controlled by freelist.c */
!     size += StrategyShmemSize();

      return size;
  }
*** src/backend/storage/buffer/buf_table.c.orig    Fri Dec 31 17:46:08 2004
--- src/backend/storage/buffer/buf_table.c    Thu Feb  3 14:40:17 2005
***************
*** 1,11 ****
  /*-------------------------------------------------------------------------
   *
   * buf_table.c
!  *      routines for finding buffers in the buffer pool.
   *
!  * NOTE: these days, what this table actually provides is a mapping from
!  * BufferTags to CDB indexes, not directly to buffers.    The function names
!  * are thus slight misnomers.
   *
   * Note: all routines in this file assume that the BufMgrLock is held
   * by the caller, so no synchronization is needed.
--- 1,11 ----
  /*-------------------------------------------------------------------------
   *
   * buf_table.c
!  *      routines for mapping BufferTags to buffer indexes.
   *
!  * NOTE: this module is called only by freelist.c, and the "buffer IDs"
!  * it deals with are whatever freelist.c needs them to be; they may not be
!  * directly equivalent to Buffer numbers.
   *
   * Note: all routines in this file assume that the BufMgrLock is held
   * by the caller, so no synchronization is needed.
***************
*** 26,37 ****
  #include "storage/bufmgr.h"


  static HTAB *SharedBufHash;


  /*
   * Initialize shmem hash table for mapping buffers
!  *        size is the desired hash table size (2*NBuffers for ARC algorithm)
   */
  void
  InitBufTable(int size)
--- 26,54 ----
  #include "storage/bufmgr.h"


+ /* entry for buffer lookup hashtable */
+ typedef struct
+ {
+     BufferTag    key;            /* Tag of a disk page */
+     int            id;                /* Associated buffer ID */
+ } BufferLookupEnt;
+
  static HTAB *SharedBufHash;


  /*
+  * Estimate space needed for mapping hashtable
+  *        size is the desired hash table size (possibly more than NBuffers)
+  */
+ int
+ BufTableShmemSize(int size)
+ {
+     return hash_estimate_size(size, sizeof(BufferLookupEnt));
+ }
+
+ /*
   * Initialize shmem hash table for mapping buffers
!  *        size is the desired hash table size (possibly more than NBuffers)
   */
  void
  InitBufTable(int size)
***************
*** 56,62 ****

  /*
   * BufTableLookup
!  *        Lookup the given BufferTag; return CDB index, or -1 if not found
   */
  int
  BufTableLookup(BufferTag *tagPtr)
--- 73,79 ----

  /*
   * BufTableLookup
!  *        Lookup the given BufferTag; return buffer ID, or -1 if not found
   */
  int
  BufTableLookup(BufferTag *tagPtr)
***************
*** 76,85 ****

  /*
   * BufTableInsert
!  *        Insert a hashtable entry for given tag and CDB index
   */
  void
! BufTableInsert(BufferTag *tagPtr, int cdb_id)
  {
      BufferLookupEnt *result;
      bool        found;
--- 93,102 ----

  /*
   * BufTableInsert
!  *        Insert a hashtable entry for given tag and buffer ID
   */
  void
! BufTableInsert(BufferTag *tagPtr, int buf_id)
  {
      BufferLookupEnt *result;
      bool        found;
***************
*** 92,106 ****
                  (errcode(ERRCODE_OUT_OF_MEMORY),
                   errmsg("out of shared memory")));

!     if (found)                    /* found something else in the table? */
          elog(ERROR, "shared buffer hash table corrupted");

!     result->id = cdb_id;
  }

  /*
   * BufTableDelete
!  *        Delete the hashtable entry for given tag
   */
  void
  BufTableDelete(BufferTag *tagPtr)
--- 109,123 ----
                  (errcode(ERRCODE_OUT_OF_MEMORY),
                   errmsg("out of shared memory")));

!     if (found)                    /* found something already in the table? */
          elog(ERROR, "shared buffer hash table corrupted");

!     result->id = buf_id;
  }

  /*
   * BufTableDelete
!  *        Delete the hashtable entry for given tag (which must exist)
   */
  void
  BufTableDelete(BufferTag *tagPtr)
*** src/backend/storage/buffer/freelist.c.orig    Fri Dec 31 17:46:08 2004
--- src/backend/storage/buffer/freelist.c    Thu Feb  3 14:45:50 2005
***************
*** 25,30 ****
--- 25,75 ----
  #include "storage/bufmgr.h"


+ /*
+  * Definitions for the buffer replacement strategy
+  */
+ #define STRAT_LIST_UNUSED    (-1)
+ #define STRAT_LIST_B1        0
+ #define STRAT_LIST_T1        1
+ #define STRAT_LIST_T2        2
+ #define STRAT_LIST_B2        3
+ #define STRAT_NUM_LISTS        4
+
+ /*
+  * The Cache Directory Block (CDB) of the Adaptive Replacement Cache (ARC)
+  */
+ typedef struct
+ {
+     int            prev;            /* list links */
+     int            next;
+     short        list;            /* ID of list it is currently in */
+     bool        t1_vacuum;        /* t => present only because of VACUUM */
+     TransactionId t1_xid;        /* the xid this entry went onto T1 */
+     BufferTag    buf_tag;        /* page identifier */
+     int            buf_id;            /* currently assigned data buffer, or -1 */
+ } BufferStrategyCDB;
+
+ /*
+  * The shared ARC control information.
+  */
+ typedef struct
+ {
+     int            target_T1_size; /* What T1 size are we aiming for */
+     int            listUnusedCDB;    /* All unused StrategyCDB */
+     int            listHead[STRAT_NUM_LISTS];        /* ARC lists B1, T1, T2
+                                                  * and B2 */
+     int            listTail[STRAT_NUM_LISTS];
+     int            listSize[STRAT_NUM_LISTS];
+     Buffer        listFreeBuffers;    /* List of unused buffers */
+
+     long        num_lookup;        /* Some hit statistics */
+     long        num_hit[STRAT_NUM_LISTS];
+     time_t        stat_report;
+
+     /* Array of CDB's starts here */
+     BufferStrategyCDB cdb[1];    /* VARIABLE SIZE ARRAY */
+ } BufferStrategyControl;
+
  /* GUC variable: time in seconds between statistics reports */
  int            DebugSharedBuffers = 0;

***************
*** 811,816 ****
--- 856,883 ----
      return num_buffer_dirty;
  }

+
+ /*
+  * StrategyShmemSize
+  *
+  * estimate the size of shared memory used by the freelist-related structures.
+  */
+ int
+ StrategyShmemSize(void)
+ {
+     int            size = 0;
+
+     /* size of CDB lookup hash table */
+     size += BufTableShmemSize(NBuffers * 2);
+
+     /* size of the shared replacement strategy control block */
+     size += MAXALIGN(sizeof(BufferStrategyControl));
+
+     /* size of the ARC directory blocks */
+     size += MAXALIGN(NBuffers * 2 * sizeof(BufferStrategyCDB));
+
+     return size;
+ }

  /*
   * StrategyInitialize -- initialize the buffer cache replacement
*** src/include/storage/buf_internals.h.orig    Fri Dec 31 17:47:02 2004
--- src/include/storage/buf_internals.h    Thu Feb  3 14:51:11 2005
***************
*** 17,24 ****

  #include "storage/backendid.h"
  #include "storage/buf.h"
- #include "storage/lmgr.h"
  #include "storage/lwlock.h"


  /*
--- 17,25 ----

  #include "storage/backendid.h"
  #include "storage/buf.h"
  #include "storage/lwlock.h"
+ #include "storage/shmem.h"
+ #include "utils/rel.h"


  /*
***************
*** 40,49 ****
   * Buffer tag identifies which disk block the buffer contains.
   *
   * Note: the BufferTag data must be sufficient to determine where to write the
!  * block, even during a "blind write" with no relcache entry.  It's possible
!  * that the backend flushing the buffer doesn't even believe the relation is
!  * visible yet (its xact may have started before the xact that created the
!  * rel).  The storage manager must be able to cope anyway.
   *
   * Note: if there's any pad bytes in the struct, INIT_BUFFERTAG will have
   * to be fixed to zero them, since this struct is used as a hash key.
--- 41,50 ----
   * Buffer tag identifies which disk block the buffer contains.
   *
   * Note: the BufferTag data must be sufficient to determine where to write the
!  * block, without reference to pg_class or pg_tablespace entries.  It's
!  * possible that the backend flushing the buffer doesn't even believe the
!  * relation is visible yet (its xact may have started before the xact that
!  * created the rel).  The storage manager must be able to cope anyway.
   *
   * Note: if there's any pad bytes in the struct, INIT_BUFFERTAG will have
   * to be fixed to zero them, since this struct is used as a hash key.
***************
*** 107,164 ****

  #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)

- /* entry for buffer lookup hashtable */
- typedef struct
- {
-     BufferTag    key;            /* Tag of a disk page */
-     int            id;                /* CDB id of associated CDB */
- } BufferLookupEnt;

! /*
!  * Definitions for the buffer replacement strategy
!  */
! #define STRAT_LIST_UNUSED    (-1)
! #define STRAT_LIST_B1        0
! #define STRAT_LIST_T1        1
! #define STRAT_LIST_T2        2
! #define STRAT_LIST_B2        3
! #define STRAT_NUM_LISTS        4
!
! /*
!  * The Cache Directory Block (CDB) of the Adaptive Replacement Cache (ARC)
!  */
! typedef struct
! {
!     int            prev;            /* list links */
!     int            next;
!     short        list;            /* ID of list it is currently in */
!     bool        t1_vacuum;        /* t => present only because of VACUUM */
!     TransactionId t1_xid;        /* the xid this entry went onto T1 */
!     BufferTag    buf_tag;        /* page identifier */
!     int            buf_id;            /* currently assigned data buffer, or -1 */
! } BufferStrategyCDB;
!
! /*
!  * The shared ARC control information.
!  */
! typedef struct
! {
!     int            target_T1_size; /* What T1 size are we aiming for */
!     int            listUnusedCDB;    /* All unused StrategyCDB */
!     int            listHead[STRAT_NUM_LISTS];        /* ARC lists B1, T1, T2
!                                                  * and B2 */
!     int            listTail[STRAT_NUM_LISTS];
!     int            listSize[STRAT_NUM_LISTS];
!     Buffer        listFreeBuffers;    /* List of unused buffers */
!
!     long        num_lookup;        /* Some hit statistics */
!     long        num_hit[STRAT_NUM_LISTS];
!     time_t        stat_report;
!
!     /* Array of CDB's starts here */
!     BufferStrategyCDB cdb[1];    /* VARIABLE SIZE ARRAY */
! } BufferStrategyControl;


  /* counters in buf_init.c */
  extern long int ReadBufferCount;
--- 108,119 ----

  #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)


! /* in bufmgr.c */
! extern BufferDesc *BufferDescriptors;

+ /* in localbuf.c */
+ extern BufferDesc *LocalBufferDescriptors;

  /* counters in buf_init.c */
  extern long int ReadBufferCount;
***************
*** 170,180 ****


  /*
!  * Bufmgr Interface:
   */

- /* Internal routines: only called by bufmgr */
-
  /* freelist.c */
  extern BufferDesc *StrategyBufferLookup(BufferTag *tagPtr, bool recheck,
                       int *cdb_found_index);
--- 125,133 ----


  /*
!  * Internal routines: only called by bufmgr
   */

  /* freelist.c */
  extern BufferDesc *StrategyBufferLookup(BufferTag *tagPtr, bool recheck,
                       int *cdb_found_index);
***************
*** 185,204 ****
  extern void StrategyHintVacuum(bool vacuum_active);
  extern int StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags,
                          int max_buffers);
  extern void StrategyInitialize(bool init);

  /* buf_table.c */
  extern void InitBufTable(int size);
  extern int    BufTableLookup(BufferTag *tagPtr);
! extern void BufTableInsert(BufferTag *tagPtr, int cdb_id);
  extern void BufTableDelete(BufferTag *tagPtr);

- /* bufmgr.c */
- extern BufferDesc *BufferDescriptors;
-
  /* localbuf.c */
- extern BufferDesc *LocalBufferDescriptors;
-
  extern BufferDesc *LocalBufferAlloc(Relation reln, BlockNumber blockNum,
                   bool *foundPtr);
  extern void WriteLocalBuffer(Buffer buffer, bool release);
--- 138,154 ----
  extern void StrategyHintVacuum(bool vacuum_active);
  extern int StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags,
                          int max_buffers);
+ extern int    StrategyShmemSize(void);
  extern void StrategyInitialize(bool init);

  /* buf_table.c */
+ extern int    BufTableShmemSize(int size);
  extern void InitBufTable(int size);
  extern int    BufTableLookup(BufferTag *tagPtr);
! extern void BufTableInsert(BufferTag *tagPtr, int buf_id);
  extern void BufTableDelete(BufferTag *tagPtr);

  /* localbuf.c */
  extern BufferDesc *LocalBufferAlloc(Relation reln, BlockNumber blockNum,
                   bool *foundPtr);
  extern void WriteLocalBuffer(Buffer buffer, bool release);

pgsql-patches by date:

Previous
From: "Ed L."
Date:
Subject: Re: dbsize patch
Next
From: Neil Conway
Date:
Subject: Re: Refactoring lock.c