Re: reducing statistics write overhead - Mailing list pgsql-hackers

From Martin Pihlak
Subject Re: reducing statistics write overhead
Date
Msg-id 48C527C0.2070707@gmail.com
Whole thread Raw
In response to Re: reducing statistics write overhead  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: reducing statistics write overhead
List pgsql-hackers
Tom Lane wrote:
> Timestamp within the file is certainly better than trying to rely on
> filesystem timestamps.  I doubt 1 sec resolution is good enough, and
> I'd also be worried about issues like clock skew between the
> postmaster's time and the filesystem's time.
>

Attached is a patch which adds a timestamp to pgstat.stat file header,
backend_read_statsfile uses this to determine if the file is fresh.
During the wait loop, the stats request message is retransmitted to
compensate for possible loss of message(s).

The collector only writes the file mostly at PGSTAT_STAT_INTERVAL frequency,
currently no extra custom timeouts can be passed in the message. This can
of course be added if need arises.

regards,
Martin


Index: src/backend/postmaster/pgstat.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/postmaster/pgstat.c,v
retrieving revision 1.181
diff -c -r1.181 pgstat.c
*** src/backend/postmaster/pgstat.c    25 Aug 2008 18:55:43 -0000    1.181
--- src/backend/postmaster/pgstat.c    8 Sep 2008 12:17:21 -0000
***************
*** 85,90 ****
--- 85,94 ----
  #define PGSTAT_SELECT_TIMEOUT    2        /* How often to check for postmaster
                                           * death; in seconds. */

+ #define PGSTAT_POLL_RETRY_DELAY    10        /* How long to pause between statistics
+                                          * update requests; in milliseconds. */
+
+ #define PGSTAT_POLL_RETRIES        500        /* How many times to repeat stats inquiry */

  /* ----------
   * The initial size hints for the hash tables used in the collector.
***************
*** 201,206 ****
--- 205,215 ----
   */
  static PgStat_GlobalStats globalStats;

+ /* Last time the collector wrote the stats file */
+ static TimestampTz last_statwrite;
+ /* Last time a backend requested a new file */
+ static TimestampTz last_statrequest;
+
  static volatile bool need_exit = false;
  static volatile bool need_statwrite = false;
  static volatile bool got_SIGHUP = false;
***************
*** 223,229 ****

  NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]);
  static void pgstat_exit(SIGNAL_ARGS);
- static void force_statwrite(SIGNAL_ARGS);
  static void pgstat_beshutdown_hook(int code, Datum arg);
  static void pgstat_sighup_handler(SIGNAL_ARGS);

--- 232,237 ----
***************
*** 254,259 ****
--- 262,268 ----
  static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
  static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
  static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
+ static void pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len);


  /* ------------------------------------------------------------
***************
*** 2463,2468 ****
--- 2472,2491 ----
      hdr->m_type = mtype;
  }

+ /* ----------
+  * pgstat_send_inquiry() -
+  *
+  *        Notify collector that we need fresh data.
+  * ----------
+  */
+ static void
+ pgstat_send_inquiry(void)
+ {
+     PgStat_MsgInquiry msg;
+
+     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_INQUIRY);
+     pgstat_send(&msg, sizeof(msg));
+ }

  /* ----------
   * pgstat_send() -
***************
*** 2538,2545 ****
  NON_EXEC_STATIC void
  PgstatCollectorMain(int argc, char *argv[])
  {
-     struct itimerval write_timeout;
-     bool        need_timer = false;
      int            len;
      PgStat_Msg    msg;

--- 2561,2566 ----
***************
*** 2571,2583 ****

      /*
       * Ignore all signals usually bound to some action in the postmaster,
!      * except SIGQUIT and SIGALRM.
       */
      pqsignal(SIGHUP, pgstat_sighup_handler);
      pqsignal(SIGINT, SIG_IGN);
      pqsignal(SIGTERM, SIG_IGN);
      pqsignal(SIGQUIT, pgstat_exit);
-     pqsignal(SIGALRM, force_statwrite);
      pqsignal(SIGPIPE, SIG_IGN);
      pqsignal(SIGUSR1, SIG_IGN);
      pqsignal(SIGUSR2, SIG_IGN);
--- 2592,2603 ----

      /*
       * Ignore all signals usually bound to some action in the postmaster,
!      * except SIGQUIT
       */
      pqsignal(SIGHUP, pgstat_sighup_handler);
      pqsignal(SIGINT, SIG_IGN);
      pqsignal(SIGTERM, SIG_IGN);
      pqsignal(SIGQUIT, pgstat_exit);
      pqsignal(SIGPIPE, SIG_IGN);
      pqsignal(SIGUSR1, SIG_IGN);
      pqsignal(SIGUSR2, SIG_IGN);
***************
*** 2598,2608 ****
       */
      need_statwrite = true;

-     /* Preset the delay between status file writes */
-     MemSet(&write_timeout, 0, sizeof(struct itimerval));
-     write_timeout.it_value.tv_sec = PGSTAT_STAT_INTERVAL / 1000;
-     write_timeout.it_value.tv_usec = (PGSTAT_STAT_INTERVAL % 1000) * 1000;
-
      /*
       * Read in an existing statistics stats file or initialize the stats to
       * zero.
--- 2618,2623 ----
***************
*** 2649,2668 ****
          }

          /*
!          * If time to write the stats file, do so.    Note that the alarm
!          * interrupt isn't re-enabled immediately, but only after we next
!          * receive a stats message; so no cycles are wasted when there is
!          * nothing going on.
           */
          if (need_statwrite)
          {
              /* Check for postmaster death; if so we'll write file below */
              if (!PostmasterIsAlive(true))
                  break;

!             pgstat_write_statsfile(false);
              need_statwrite = false;
-             need_timer = true;
          }

          /*
--- 2664,2687 ----
          }

          /*
!          * If time to write the stats file, do so.
           */
          if (need_statwrite)
          {
+             TimestampTz now = GetCurrentTimestamp();
+
              /* Check for postmaster death; if so we'll write file below */
              if (!PostmasterIsAlive(true))
                  break;

!             /* Write the file, but only if enough time has passed since last write */
!             if (TimestampDifferenceExceeds(last_statwrite, now, PGSTAT_STAT_INTERVAL))
!             {
!                 pgstat_write_statsfile(false);
!                 last_statwrite = now;
!             }
!
              need_statwrite = false;
          }

          /*
***************
*** 2797,2817 ****
                      pgstat_recv_funcpurge((PgStat_MsgFuncpurge *) &msg, len);
                      break;

!                 default:
                      break;
-             }

!             /*
!              * If this is the first message after we wrote the stats file the
!              * last time, enable the alarm interrupt to make it be written
!              * again later.
!              */
!             if (need_timer)
!             {
!                 if (setitimer(ITIMER_REAL, &write_timeout, NULL))
!                     ereport(ERROR,
!                     (errmsg("could not set statistics collector timer: %m")));
!                 need_timer = false;
              }
          }
          else
--- 2816,2827 ----
                      pgstat_recv_funcpurge((PgStat_MsgFuncpurge *) &msg, len);
                      break;

!                 case PGSTAT_MTYPE_INQUIRY:
!                     pgstat_recv_inquiry((PgStat_MsgInquiry *)&msg, len);
                      break;

!                 default:
!                     break;
              }
          }
          else
***************
*** 2841,2853 ****
      need_exit = true;
  }

- /* SIGALRM signal handler for collector process */
- static void
- force_statwrite(SIGNAL_ARGS)
- {
-     need_statwrite = true;
- }
-
  /* SIGHUP handler for collector process */
  static void
  pgstat_sighup_handler(SIGNAL_ARGS)
--- 2851,2856 ----
***************
*** 2937,2949 ****
      PgStat_StatFuncEntry *funcentry;
      FILE       *fpout;
      int32        format_id;
      const char *tmpfile = permanent?PGSTAT_STAT_PERMANENT_TMPFILE:pgstat_stat_tmpname;
      const char *statfile = permanent?PGSTAT_STAT_PERMANENT_FILENAME:pgstat_stat_filename;

      /*
       * Open the statistics temp file to write out the current values.
       */
!     fpout = fopen(tmpfile, PG_BINARY_W);
      if (fpout == NULL)
      {
          ereport(LOG,
--- 2940,2953 ----
      PgStat_StatFuncEntry *funcentry;
      FILE       *fpout;
      int32        format_id;
+     TimestampTz    ts;
      const char *tmpfile = permanent?PGSTAT_STAT_PERMANENT_TMPFILE:pgstat_stat_tmpname;
      const char *statfile = permanent?PGSTAT_STAT_PERMANENT_FILENAME:pgstat_stat_filename;

      /*
       * Open the statistics temp file to write out the current values.
       */
!     fpout = AllocateFile(tmpfile, PG_BINARY_W);
      if (fpout == NULL)
      {
          ereport(LOG,
***************
*** 2954,2963 ****
      }

      /*
!      * Write the file header --- currently just a format ID.
       */
      format_id = PGSTAT_FILE_FORMAT_ID;
      fwrite(&format_id, sizeof(format_id), 1, fpout);

      /*
       * Write global stats struct
--- 2958,2969 ----
      }

      /*
!      * Write the file header --- currently just a format ID and a timestamp.
       */
      format_id = PGSTAT_FILE_FORMAT_ID;
+     ts = GetCurrentTimestamp();
      fwrite(&format_id, sizeof(format_id), 1, fpout);
+     fwrite(&ts, sizeof(ts), 1, fpout);

      /*
       * Write global stats struct
***************
*** 3017,3026 ****
                  (errcode_for_file_access(),
                 errmsg("could not write temporary statistics file \"%s\": %m",
                        tmpfile)));
!         fclose(fpout);
          unlink(tmpfile);
      }
!     else if (fclose(fpout) < 0)
      {
          ereport(LOG,
                  (errcode_for_file_access(),
--- 3023,3032 ----
                  (errcode_for_file_access(),
                 errmsg("could not write temporary statistics file \"%s\": %m",
                        tmpfile)));
!         FreeFile(fpout);
          unlink(tmpfile);
      }
!     else if (FreeFile(fpout) < 0)
      {
          ereport(LOG,
                  (errcode_for_file_access(),
***************
*** 3108,3113 ****
--- 3114,3127 ----
          goto done;
      }

+     /* Skip over the timestamp */
+     if (fseek(fpin, sizeof(TimestampTz), SEEK_CUR) != 0)
+     {
+         ereport(pgStatRunningInCollector ? LOG : WARNING,
+                 (errmsg("corrupted pgstat.stat file")));
+         goto done;
+     }
+
      /*
       * Read global stats struct
       */
***************
*** 3297,3307 ****
--- 3311,3365 ----
  static void
  backend_read_statsfile(void)
  {
+     TimestampTz start;
+     bool stats_ready = false;
+     int count;
+
      /* already read it? */
      if (pgStatDBHash)
          return;
      Assert(!pgStatRunningInCollector);

+     start = GetCurrentTimestamp();
+
+     /*
+      * If the stats requests are sent too frequently the collector
+      * might refuse to write the file and cause a longish wait here.
+      * So we reuse the old file if we know that not enough time has
+      * passsed since last request.
+      */
+     if (!TimestampDifferenceExceeds(last_statrequest, start, PGSTAT_STAT_INTERVAL))
+         stats_ready = true;
+     last_statrequest = start;
+
+     /*
+      * Loop until fresh stats file is available or we ran out of time.
+      * The stats inquiry message is resent in case collector drops our
+      * first message.
+      */
+     for (count = 0; !stats_ready && count < PGSTAT_POLL_RETRIES; count++)
+     {
+         TimestampTz ts;
+         FILE *fp;
+
+         CHECK_FOR_INTERRUPTS();
+         pgstat_send_inquiry();
+         pg_usleep(PGSTAT_POLL_RETRY_DELAY * 1000);
+
+         /* Read the timestamp from beginning of the stats file */
+         if ((fp = AllocateFile(pgstat_stat_filename, PG_BINARY_R)) != NULL)
+         {
+             fseek(fp, sizeof(int32), SEEK_SET);
+             fread(&ts, sizeof(ts), 1, fp);
+             if (!ferror(fp) && TimestampDifferenceExceeds(start, ts, 0))
+                 stats_ready = true;
+             FreeFile(fp);
+         }
+     }
+
+     if (count >= PGSTAT_POLL_RETRIES)
+         elog(WARNING, "pgstat wait timeout");
+
      /* Autovacuum launcher wants stats about all databases */
      if (IsAutoVacuumLauncherProcess())
          pgStatDBHash = pgstat_read_statsfile(InvalidOid, false);
***************
*** 3779,3781 ****
--- 3837,3852 ----
                             HASH_REMOVE, NULL);
      }
  }
+
+ /* ----------
+  * pgstat_recv_inquiry() -
+  *
+  *    Process stat inquiry requests.
+  * ----------
+  */
+ static void
+ pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len)
+ {
+     need_statwrite = true;
+ }
+
Index: src/include/pgstat.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/pgstat.h,v
retrieving revision 1.78
diff -c -r1.78 pgstat.h
*** src/include/pgstat.h    15 Aug 2008 08:37:40 -0000    1.78
--- src/include/pgstat.h    8 Sep 2008 12:17:23 -0000
***************
*** 42,48 ****
      PGSTAT_MTYPE_ANALYZE,
      PGSTAT_MTYPE_BGWRITER,
      PGSTAT_MTYPE_FUNCSTAT,
!     PGSTAT_MTYPE_FUNCPURGE
  } StatMsgType;

  /* ----------
--- 42,49 ----
      PGSTAT_MTYPE_ANALYZE,
      PGSTAT_MTYPE_BGWRITER,
      PGSTAT_MTYPE_FUNCSTAT,
!     PGSTAT_MTYPE_FUNCPURGE,
!     PGSTAT_MTYPE_INQUIRY
  } StatMsgType;

  /* ----------
***************
*** 385,390 ****
--- 386,403 ----


  /* ----------
+  * PgStat_MsgInquiry            Sent by the backend to tell the collector
+  *                                to write the stats file.
+  * ----------
+  */
+
+ typedef struct PgStat_MsgInquiry
+ {
+     PgStat_MsgHdr m_hdr;
+ } PgStat_MsgInquiry;
+
+
+ /* ----------
   * PgStat_Msg                    Union over all possible messages.
   * ----------
   */
***************
*** 402,407 ****
--- 415,421 ----
      PgStat_MsgBgWriter msg_bgwriter;
      PgStat_MsgFuncstat msg_funcstat;
      PgStat_MsgFuncpurge msg_funcpurge;
+     PgStat_MsgInquiry msg_inquiry;
  } PgStat_Msg;


***************
*** 413,419 ****
   * ------------------------------------------------------------
   */

! #define PGSTAT_FILE_FORMAT_ID    0x01A5BC97

  /* ----------
   * PgStat_StatDBEntry            The collector's data per database
--- 427,433 ----
   * ------------------------------------------------------------
   */

! #define PGSTAT_FILE_FORMAT_ID    0x01A5BC98

  /* ----------
   * PgStat_StatDBEntry            The collector's data per database

pgsql-hackers by date:

Previous
From: Alvaro Herrera
Date:
Subject: Re: Some newbie questions
Next
From: "Greg Stark"
Date:
Subject: Re: [PATCH] Cleanup of GUC units code