Thread: trace_checkpoint parameter patch

trace_checkpoint parameter patch

From
Satoshi Nagayasu
Date:
Hi all,

Here is a brand new patch to log a checkpointing load information
to tune the bgwriter parameter.

When you enable trace_checkpoint parameter and process
the checkpoint, the bgwriter logs the number of
flushed buffer pages and the elapsed time.

-----------------------------------------------------------
{61}snaga@artemis:~/pgsql-snapshot% ./bin/psql -c 'checkpoint' pgbench
LOG:  CheckPoint: 1522/3072 buffer(s) flushed. CPU 0.03s/0.00u sec elapsed 1.57 sec
CHECKPOINT
{62}snaga@artemis:~/pgsql-snapshot%
-----------------------------------------------------------

I think this information is useful to configure
the bgwriter parameter to reduce the performance impact
of the checkpointing.

Any suggestion or comments?
--
NAGAYASU Satoshi <snaga@snaga.org>
diff -rc postgresql-snapshot.orig/src/backend/access/transam/xlog.c
postgresql-snapshot/src/backend/access/transam/xlog.c
*** postgresql-snapshot.orig/src/backend/access/transam/xlog.c    Fri Jun  1 00:13:01 2007
--- postgresql-snapshot/src/backend/access/transam/xlog.c    Tue Jun 12 18:37:47 2007
***************
*** 48,53 ****
--- 48,54 ----
  #include "storage/spin.h"
  #include "utils/builtins.h"
  #include "utils/pg_locale.h"
+ #include "utils/pg_rusage.h"



***************
*** 66,76 ****
  char       *XLOG_sync_method = NULL;
  const char    XLOG_sync_method_default[] = DEFAULT_SYNC_METHOD_STR;
  bool        fullPageWrites = true;
-
  #ifdef WAL_DEBUG
  bool        XLOG_DEBUG = false;
  #endif

  /*
   * XLOGfileslop is used in the code as the allowed "fuzz" in the number of
   * preallocated XLOG segments --- we try to have at least XLOGfiles advance
--- 67,78 ----
  char       *XLOG_sync_method = NULL;
  const char    XLOG_sync_method_default[] = DEFAULT_SYNC_METHOD_STR;
  bool        fullPageWrites = true;
  #ifdef WAL_DEBUG
  bool        XLOG_DEBUG = false;
  #endif

+ bool        trace_checkpoint = false;
+
  /*
   * XLOGfileslop is used in the code as the allowed "fuzz" in the number of
   * preallocated XLOG segments --- we try to have at least XLOGfiles advance
***************
*** 5699,5708 ****
  static void
  CheckPointGuts(XLogRecPtr checkPointRedo)
  {
      CheckPointCLOG();
      CheckPointSUBTRANS();
      CheckPointMultiXact();
!     FlushBufferPool();            /* performs all required fsyncs */
      /* We deliberately delay 2PC checkpointing as long as possible */
      CheckPointTwoPhase(checkPointRedo);
  }
--- 5701,5722 ----
  static void
  CheckPointGuts(XLogRecPtr checkPointRedo)
  {
+     PGRUsage ru0;
+     int flushed = 0;
+
      CheckPointCLOG();
      CheckPointSUBTRANS();
      CheckPointMultiXact();
!
!     if (trace_checkpoint)
!             pg_rusage_init(&ru0);
!
!     flushed = FlushBufferPool();            /* performs all required fsyncs */
!
!     if (trace_checkpoint)
!             elog(LOG, "CheckPoint: %d/%d buffer(s) flushed. %s",
!                  flushed, NBuffers, pg_rusage_show(&ru0));
!
      /* We deliberately delay 2PC checkpointing as long as possible */
      CheckPointTwoPhase(checkPointRedo);
  }
diff -rc postgresql-snapshot.orig/src/backend/storage/buffer/bufmgr.c
postgresql-snapshot/src/backend/storage/buffer/bufmgr.c
*** postgresql-snapshot.orig/src/backend/storage/buffer/bufmgr.c    Thu May 31 05:11:58 2007
--- postgresql-snapshot/src/backend/storage/buffer/bufmgr.c    Tue Jun 12 17:51:25 2007
***************
*** 1001,1012 ****
   *
   * This is called at checkpoint time to write out all dirty shared buffers.
   */
! void
  BufferSync(void)
  {
      int            buf_id;
      int            num_to_scan;
      int            absorb_counter;

      /*
       * Find out where to start the circular scan.
--- 1001,1013 ----
   *
   * This is called at checkpoint time to write out all dirty shared buffers.
   */
! int
  BufferSync(void)
  {
      int            buf_id;
      int            num_to_scan;
      int            absorb_counter;
+     int            flushed;

      /*
       * Find out where to start the circular scan.
***************
*** 1019,1024 ****
--- 1020,1026 ----
      /*
       * Loop over all buffers.
       */
+     flushed = 0;
      num_to_scan = NBuffers;
      absorb_counter = WRITES_PER_ABSORB;
      while (num_to_scan-- > 0)
***************
*** 1038,1047 ****
--- 1040,1053 ----
                  AbsorbFsyncRequests();
                  absorb_counter = WRITES_PER_ABSORB;
              }
+
+             flushed++;
          }
          if (++buf_id >= NBuffers)
              buf_id = 0;
      }
+
+     return flushed;
  }

  /*
***************
*** 1340,1350 ****
   * Local relations do not participate in checkpoints, so they don't need to be
   * flushed.
   */
! void
  FlushBufferPool(void)
  {
!     BufferSync();
      smgrsync();
  }


--- 1346,1358 ----
   * Local relations do not participate in checkpoints, so they don't need to be
   * flushed.
   */
! int
  FlushBufferPool(void)
  {
!     int flushed = BufferSync();
      smgrsync();
+
+     return flushed;
  }


diff -rc postgresql-snapshot.orig/src/backend/utils/misc/guc.c postgresql-snapshot/src/backend/utils/misc/guc.c
*** postgresql-snapshot.orig/src/backend/utils/misc/guc.c    Sat Jun  9 03:23:52 2007
--- postgresql-snapshot/src/backend/utils/misc/guc.c    Tue Jun 12 18:43:42 2007
***************
*** 109,114 ****
--- 109,117 ----
  #ifdef TRACE_SORT
  extern bool trace_sort;
  #endif
+ #ifdef TRACE_CHECKPOINT
+ extern bool trace_checkpoint;
+ #endif
  #ifdef TRACE_SYNCSCAN
  extern bool trace_syncscan;
  #endif
***************
*** 973,978 ****
--- 976,993 ----
      },
  #endif

+ #ifdef TRACE_CHECKPOINT
+     {
+         {"trace_checkpoint", PGC_SIGHUP, DEVELOPER_OPTIONS,
+             gettext_noop("Emit information about resource usage in checkpointing."),
+             NULL,
+             GUC_NOT_IN_SAMPLE
+         },
+         &trace_checkpoint,
+         false, NULL, NULL
+     },
+ #endif
+
  #ifdef TRACE_SYNCSCAN
      /* this is undocumented because not exposed in a standard build */
      {
diff -rc postgresql-snapshot.orig/src/include/pg_config_manual.h postgresql-snapshot/src/include/pg_config_manual.h
*** postgresql-snapshot.orig/src/include/pg_config_manual.h    Sat Jun  9 03:23:53 2007
--- postgresql-snapshot/src/include/pg_config_manual.h    Tue Jun 12 18:42:56 2007
***************
*** 250,255 ****
--- 250,261 ----
  #define TRACE_SORT 1

  /*
+  * Enable tracing of resource consumption during checkpoint operations;
+  * see also the trace_checkpoint GUC var.  For 8.3 this is enabled by default.
+  */
+ #define TRACE_CHECKPOINT 1
+
+ /*
   * Enable tracing of syncscan operations (see also the trace_syncscan GUC var).
   */
  /* #define TRACE_SYNCSCAN */
diff -rc postgresql-snapshot.orig/src/include/storage/bufmgr.h postgresql-snapshot/src/include/storage/bufmgr.h
*** postgresql-snapshot.orig/src/include/storage/bufmgr.h    Thu May 31 05:12:03 2007
--- postgresql-snapshot/src/include/storage/bufmgr.h    Tue Jun 12 17:51:38 2007
***************
*** 136,142 ****
  extern void ResetBufferUsage(void);
  extern void AtEOXact_Buffers(bool isCommit);
  extern void PrintBufferLeakWarning(Buffer buffer);
! extern void FlushBufferPool(void);
  extern BlockNumber BufferGetBlockNumber(Buffer buffer);
  extern BlockNumber RelationGetNumberOfBlocks(Relation relation);
  extern void RelationTruncate(Relation rel, BlockNumber nblocks);
--- 136,142 ----
  extern void ResetBufferUsage(void);
  extern void AtEOXact_Buffers(bool isCommit);
  extern void PrintBufferLeakWarning(Buffer buffer);
! extern int FlushBufferPool(void);
  extern BlockNumber BufferGetBlockNumber(Buffer buffer);
  extern BlockNumber RelationGetNumberOfBlocks(Relation relation);
  extern void RelationTruncate(Relation rel, BlockNumber nblocks);
***************
*** 161,167 ****
  extern void AbortBufferIO(void);

  extern void BufmgrCommit(void);
! extern void BufferSync(void);
  extern void BgBufferSync(void);

  extern void AtProcExit_LocalBuffers(void);
--- 161,167 ----
  extern void AbortBufferIO(void);

  extern void BufmgrCommit(void);
! extern int BufferSync(void);
  extern void BgBufferSync(void);

  extern void AtProcExit_LocalBuffers(void);

Re: trace_checkpoint parameter patch

From
Alvaro Herrera
Date:
Satoshi Nagayasu wrote:
> Hi all,
>
> Here is a brand new patch to log a checkpointing load information
> to tune the bgwriter parameter.
>
> When you enable trace_checkpoint parameter and process
> the checkpoint, the bgwriter logs the number of
> flushed buffer pages and the elapsed time.

I don't think it needs to be in a TRACE_CHECKPOINT ifdef ... (and even
if it does, I think this would fail to compile with that one disabled).


> -----------------------------------------------------------
> {61}snaga@artemis:~/pgsql-snapshot% ./bin/psql -c 'checkpoint' pgbench
> LOG:  CheckPoint: 1522/3072 buffer(s) flushed. CPU 0.03s/0.00u sec elapsed 1.57 sec
> CHECKPOINT
> {62}snaga@artemis:~/pgsql-snapshot%
> -----------------------------------------------------------
>
> I think this information is useful to configure
> the bgwriter parameter to reduce the performance impact
> of the checkpointing.
>
> Any suggestion or comments?

Well, more I/O numbers would be more interesting than CPU stats ...

--
Alvaro Herrera                 http://www.amazon.com/gp/registry/DXLWNGRJD34J
"Everybody understands Mickey Mouse. Few understand Hermann Hesse.
Hardly anybody understands Einstein. And nobody understands Emperor Norton."

Re: trace_checkpoint parameter patch

From
"Satoshi Nagayasu"
Date:
2007/6/13, Alvaro Herrera <alvherre@commandprompt.com>:
> Well, more I/O numbers would be more interesting than CPU stats ...

Well, I think so too, and I attempted to print block in / out using getrusage(),
but I couldn't get them because they were always zero (on my linux).

I still can't understand the reason.
--
NAGAYASU Satoshi <snaga@snaga.org>

Re: trace_checkpoint parameter patch

From
Greg Smith
Date:
On Tue, 12 Jun 2007, Satoshi Nagayasu wrote:

> Here is a brand new patch to log a checkpointing load information
> to tune the bgwriter parameter.

There is a work in progress patch that logs this and additional checkpoint
information being reviewed in hopes of making it into the 8.3 build.  See
http://archives.postgresql.org/pgsql-patches/2007-05/msg00455.php for the
latest version.

The idea of using pg_rusage_init is a new one though; I hadn't thought the
CPU usage info was interesting enough to figure out how to collect it.
The way the patch mentioned above works it would be hard to squeeze it in
the line usefully for formatting reasons.

> I attempted to print block in / out using getrusage(), but I couldn't
> get them because they were always zero (on my linux).

I don't know what's wrong, but the I/O here is pretty simple:  the
checkpoint wrote some amount of data that you can compute the size of
easily within the code knowing the block size.  That's already done in the
patch under review.

If you're interested in this area, you should check out the
pg_stat_bgwriter feature already in the 8.3 CVS, look through the
pgsql-hackers archives for the discussion this week on the topic
"Controlling Load Distributed Checkpoints", and check out the "Automatic
adjustment of bgwriter_lru_maxpages" patch whose latest version is at
http://archives.postgresql.org/pgsql-patches/2007-05/msg00142.php

--
* Greg Smith gsmith@gregsmith.com http://www.gregsmith.com Baltimore, MD

Re: trace_checkpoint parameter patch

From
Bruce Momjian
Date:
Now that we have log_checkpoints, consider resubmitting this with just
the new information you want to add.  Thanks.

---------------------------------------------------------------------------

Satoshi Nagayasu wrote:
> Hi all,
>
> Here is a brand new patch to log a checkpointing load information
> to tune the bgwriter parameter.
>
> When you enable trace_checkpoint parameter and process
> the checkpoint, the bgwriter logs the number of
> flushed buffer pages and the elapsed time.
>
> -----------------------------------------------------------
> {61}snaga@artemis:~/pgsql-snapshot% ./bin/psql -c 'checkpoint' pgbench
> LOG:  CheckPoint: 1522/3072 buffer(s) flushed. CPU 0.03s/0.00u sec elapsed 1.57 sec
> CHECKPOINT
> {62}snaga@artemis:~/pgsql-snapshot%
> -----------------------------------------------------------
>
> I think this information is useful to configure
> the bgwriter parameter to reduce the performance impact
> of the checkpointing.
>
> Any suggestion or comments?
> --
> NAGAYASU Satoshi <snaga@snaga.org>

> diff -rc postgresql-snapshot.orig/src/backend/access/transam/xlog.c
postgresql-snapshot/src/backend/access/transam/xlog.c
> *** postgresql-snapshot.orig/src/backend/access/transam/xlog.c    Fri Jun  1 00:13:01 2007
> --- postgresql-snapshot/src/backend/access/transam/xlog.c    Tue Jun 12 18:37:47 2007
> ***************
> *** 48,53 ****
> --- 48,54 ----
>   #include "storage/spin.h"
>   #include "utils/builtins.h"
>   #include "utils/pg_locale.h"
> + #include "utils/pg_rusage.h"
>
>
>
> ***************
> *** 66,76 ****
>   char       *XLOG_sync_method = NULL;
>   const char    XLOG_sync_method_default[] = DEFAULT_SYNC_METHOD_STR;
>   bool        fullPageWrites = true;
> -
>   #ifdef WAL_DEBUG
>   bool        XLOG_DEBUG = false;
>   #endif
>
>   /*
>    * XLOGfileslop is used in the code as the allowed "fuzz" in the number of
>    * preallocated XLOG segments --- we try to have at least XLOGfiles advance
> --- 67,78 ----
>   char       *XLOG_sync_method = NULL;
>   const char    XLOG_sync_method_default[] = DEFAULT_SYNC_METHOD_STR;
>   bool        fullPageWrites = true;
>   #ifdef WAL_DEBUG
>   bool        XLOG_DEBUG = false;
>   #endif
>
> + bool        trace_checkpoint = false;
> +
>   /*
>    * XLOGfileslop is used in the code as the allowed "fuzz" in the number of
>    * preallocated XLOG segments --- we try to have at least XLOGfiles advance
> ***************
> *** 5699,5708 ****
>   static void
>   CheckPointGuts(XLogRecPtr checkPointRedo)
>   {
>       CheckPointCLOG();
>       CheckPointSUBTRANS();
>       CheckPointMultiXact();
> !     FlushBufferPool();            /* performs all required fsyncs */
>       /* We deliberately delay 2PC checkpointing as long as possible */
>       CheckPointTwoPhase(checkPointRedo);
>   }
> --- 5701,5722 ----
>   static void
>   CheckPointGuts(XLogRecPtr checkPointRedo)
>   {
> +     PGRUsage ru0;
> +     int flushed = 0;
> +
>       CheckPointCLOG();
>       CheckPointSUBTRANS();
>       CheckPointMultiXact();
> !
> !     if (trace_checkpoint)
> !             pg_rusage_init(&ru0);
> !
> !     flushed = FlushBufferPool();            /* performs all required fsyncs */
> !
> !     if (trace_checkpoint)
> !             elog(LOG, "CheckPoint: %d/%d buffer(s) flushed. %s",
> !                  flushed, NBuffers, pg_rusage_show(&ru0));
> !
>       /* We deliberately delay 2PC checkpointing as long as possible */
>       CheckPointTwoPhase(checkPointRedo);
>   }
> diff -rc postgresql-snapshot.orig/src/backend/storage/buffer/bufmgr.c
postgresql-snapshot/src/backend/storage/buffer/bufmgr.c
> *** postgresql-snapshot.orig/src/backend/storage/buffer/bufmgr.c    Thu May 31 05:11:58 2007
> --- postgresql-snapshot/src/backend/storage/buffer/bufmgr.c    Tue Jun 12 17:51:25 2007
> ***************
> *** 1001,1012 ****
>    *
>    * This is called at checkpoint time to write out all dirty shared buffers.
>    */
> ! void
>   BufferSync(void)
>   {
>       int            buf_id;
>       int            num_to_scan;
>       int            absorb_counter;
>
>       /*
>        * Find out where to start the circular scan.
> --- 1001,1013 ----
>    *
>    * This is called at checkpoint time to write out all dirty shared buffers.
>    */
> ! int
>   BufferSync(void)
>   {
>       int            buf_id;
>       int            num_to_scan;
>       int            absorb_counter;
> +     int            flushed;
>
>       /*
>        * Find out where to start the circular scan.
> ***************
> *** 1019,1024 ****
> --- 1020,1026 ----
>       /*
>        * Loop over all buffers.
>        */
> +     flushed = 0;
>       num_to_scan = NBuffers;
>       absorb_counter = WRITES_PER_ABSORB;
>       while (num_to_scan-- > 0)
> ***************
> *** 1038,1047 ****
> --- 1040,1053 ----
>                   AbsorbFsyncRequests();
>                   absorb_counter = WRITES_PER_ABSORB;
>               }
> +
> +             flushed++;
>           }
>           if (++buf_id >= NBuffers)
>               buf_id = 0;
>       }
> +
> +     return flushed;
>   }
>
>   /*
> ***************
> *** 1340,1350 ****
>    * Local relations do not participate in checkpoints, so they don't need to be
>    * flushed.
>    */
> ! void
>   FlushBufferPool(void)
>   {
> !     BufferSync();
>       smgrsync();
>   }
>
>
> --- 1346,1358 ----
>    * Local relations do not participate in checkpoints, so they don't need to be
>    * flushed.
>    */
> ! int
>   FlushBufferPool(void)
>   {
> !     int flushed = BufferSync();
>       smgrsync();
> +
> +     return flushed;
>   }
>
>
> diff -rc postgresql-snapshot.orig/src/backend/utils/misc/guc.c postgresql-snapshot/src/backend/utils/misc/guc.c
> *** postgresql-snapshot.orig/src/backend/utils/misc/guc.c    Sat Jun  9 03:23:52 2007
> --- postgresql-snapshot/src/backend/utils/misc/guc.c    Tue Jun 12 18:43:42 2007
> ***************
> *** 109,114 ****
> --- 109,117 ----
>   #ifdef TRACE_SORT
>   extern bool trace_sort;
>   #endif
> + #ifdef TRACE_CHECKPOINT
> + extern bool trace_checkpoint;
> + #endif
>   #ifdef TRACE_SYNCSCAN
>   extern bool trace_syncscan;
>   #endif
> ***************
> *** 973,978 ****
> --- 976,993 ----
>       },
>   #endif
>
> + #ifdef TRACE_CHECKPOINT
> +     {
> +         {"trace_checkpoint", PGC_SIGHUP, DEVELOPER_OPTIONS,
> +             gettext_noop("Emit information about resource usage in checkpointing."),
> +             NULL,
> +             GUC_NOT_IN_SAMPLE
> +         },
> +         &trace_checkpoint,
> +         false, NULL, NULL
> +     },
> + #endif
> +
>   #ifdef TRACE_SYNCSCAN
>       /* this is undocumented because not exposed in a standard build */
>       {
> diff -rc postgresql-snapshot.orig/src/include/pg_config_manual.h postgresql-snapshot/src/include/pg_config_manual.h
> *** postgresql-snapshot.orig/src/include/pg_config_manual.h    Sat Jun  9 03:23:53 2007
> --- postgresql-snapshot/src/include/pg_config_manual.h    Tue Jun 12 18:42:56 2007
> ***************
> *** 250,255 ****
> --- 250,261 ----
>   #define TRACE_SORT 1
>
>   /*
> +  * Enable tracing of resource consumption during checkpoint operations;
> +  * see also the trace_checkpoint GUC var.  For 8.3 this is enabled by default.
> +  */
> + #define TRACE_CHECKPOINT 1
> +
> + /*
>    * Enable tracing of syncscan operations (see also the trace_syncscan GUC var).
>    */
>   /* #define TRACE_SYNCSCAN */
> diff -rc postgresql-snapshot.orig/src/include/storage/bufmgr.h postgresql-snapshot/src/include/storage/bufmgr.h
> *** postgresql-snapshot.orig/src/include/storage/bufmgr.h    Thu May 31 05:12:03 2007
> --- postgresql-snapshot/src/include/storage/bufmgr.h    Tue Jun 12 17:51:38 2007
> ***************
> *** 136,142 ****
>   extern void ResetBufferUsage(void);
>   extern void AtEOXact_Buffers(bool isCommit);
>   extern void PrintBufferLeakWarning(Buffer buffer);
> ! extern void FlushBufferPool(void);
>   extern BlockNumber BufferGetBlockNumber(Buffer buffer);
>   extern BlockNumber RelationGetNumberOfBlocks(Relation relation);
>   extern void RelationTruncate(Relation rel, BlockNumber nblocks);
> --- 136,142 ----
>   extern void ResetBufferUsage(void);
>   extern void AtEOXact_Buffers(bool isCommit);
>   extern void PrintBufferLeakWarning(Buffer buffer);
> ! extern int FlushBufferPool(void);
>   extern BlockNumber BufferGetBlockNumber(Buffer buffer);
>   extern BlockNumber RelationGetNumberOfBlocks(Relation relation);
>   extern void RelationTruncate(Relation rel, BlockNumber nblocks);
> ***************
> *** 161,167 ****
>   extern void AbortBufferIO(void);
>
>   extern void BufmgrCommit(void);
> ! extern void BufferSync(void);
>   extern void BgBufferSync(void);
>
>   extern void AtProcExit_LocalBuffers(void);
> --- 161,167 ----
>   extern void AbortBufferIO(void);
>
>   extern void BufmgrCommit(void);
> ! extern int BufferSync(void);
>   extern void BgBufferSync(void);
>
>   extern void AtProcExit_LocalBuffers(void);
>
>
> ---------------------------(end of broadcast)---------------------------
> TIP 7: You can help support the PostgreSQL project by donating at
>
>                 http://www.postgresql.org/about/donate

--
  Bruce Momjian  <bruce@momjian.us>        http://momjian.us
  EnterpriseDB                             http://postgres.enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +