New test_fsync messages for direct I/O - Mailing list pgsql-hackers

From Bruce Momjian
Subject New test_fsync messages for direct I/O
Date
Msg-id 201101160015.p0G0F4113329@momjian.us
Whole thread Raw
In response to Re: Final(?) proposal for wal_sync_method changes  (Josh Berkus <josh@agliodbs.com>)
List pgsql-hackers
Josh Berkus wrote:
> Greg,
>
> > This is interesting, because test_fsync consistently reported a rate of
> > about half this when using open_datasync instead of the equal
> > performance I'm getting from the database.  I'll see if I can reproduce
> > that further, but it's no reason to be concerned about the change that's
> > been made I think.  Just more evidence that test_fsync has quirks left
> > to be sorted out.  But that's not backbranch material, it should be part
> > of 9.1 only refactoring, already in progress via the patch Josh
> > submitted.  There's a bit of time left to get that done.
>
> Did you rerun test_sync with O_DIRECT entabled, using my patch?  The
> figures you had from test_fsync earlier were without O_DIRECT.

I have modified test_fsync with the attached, applied patch to report
cases where we are testing without O_DIRECT when only O_DIRECT would be
used by the server, and cases where O_DIRECT fails because of the file
system type.   Josh Berkus wanted the first case kept in case we decide
to offer non-direct-io options on machines that support direct i/o.

The new messages are:

    * This non-direct I/O option is not used by Postgres.

    ** This file system and its mount options do not support direct
    I/O, e.g. ext4 in journaled mode.

You can see the first one below in my output from Ubuntu:

    $ ./test_fsync
    Ops-per-test = 2000

    Simple non-sync'ed write:
            8k write                           58.175 ops/sec

    Compare file sync methods using one write:
    (in wal_sync_method preference order, except fdatasync
    is Linux's default)
            open_datasync                                 n/a
            8k write, fdatasync                68.425 ops/sec
            8k write, fsync                    63.932 ops/sec
            fsync_writethrough                            n/a
            open_sync 8k write*                73.785 ops/sec
            open_sync 8k direct I/O write      82.929 ops/sec
    * This non-direct I/O option is not used by Postgres.

    Compare file sync methods using two writes:
    (in wal_sync_method preference order, except fdatasync
    is Linux's default)
            open_datasync                                 n/a
            8k write, 8k write, fdatasync      42.728 ops/sec
            8k write, 8k write, fsync          43.625 ops/sec
            fsync_writethrough                            n/a
            2 open_sync 8k writes*             37.150 ops/sec
            2 open_sync 8k direct I/O writes   43.722 ops/sec
    * This non-direct I/O option is not used by Postgres.

    Compare open_sync with different sizes:
    (This is designed to compare the cost of one large
    sync'ed write and two smaller sync'ed writes.)
            open_sync 16k write                46.428 ops/sec
            2 open_sync 8k writes              38.703 ops/sec

    Test if fsync on non-write file descriptor is honored:
    (If the times are similar, fsync() can sync data written
    on a different descriptor.)
            8k write, fsync, close             65.744 ops/sec
            8k write, close, fsync             63.077 ops/sec

I believe test_fsync now matches the backend code.  If we decide to
change things, it can be adjusted.

--
  Bruce Momjian  <bruce@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + It's impossible for everything to be true. +
diff --git a/src/tools/fsync/test_fsync.c b/src/tools/fsync/test_fsync.c
index cd2b1f2..562da66 100644
*** /tmp/8wSNdd_test_fsync.c    Sat Jan 15 18:42:58 2011
--- src/tools/fsync/test_fsync.c    Sat Jan 15 18:38:57 2011
*************** void
*** 163,169 ****
  test_sync(int writes_per_op)
  {
      int            tmpfile, ops, writes;
!
      if (writes_per_op == 1)
          printf("\nCompare file sync methods using one write:\n");
      else
--- 163,170 ----
  test_sync(int writes_per_op)
  {
      int            tmpfile, ops, writes;
!     bool        fs_warning = false;
!
      if (writes_per_op == 1)
          printf("\nCompare file sync methods using one write:\n");
      else
*************** test_sync(int writes_per_op)
*** 176,184 ****
       */
  #ifdef OPEN_DATASYNC_FLAG
      if (writes_per_op == 1)
!         printf(LABEL_FORMAT, "open_datasync 8k write");
      else
!          printf(LABEL_FORMAT, "2 open_datasync 8k writes");
      fflush(stdout);

      if ((tmpfile = open(filename, O_RDWR | O_DSYNC, 0)) == -1)
--- 177,193 ----
       */
  #ifdef OPEN_DATASYNC_FLAG
      if (writes_per_op == 1)
!         printf(LABEL_FORMAT, "open_datasync 8k write"
! #if PG_O_DIRECT != 0
!         "*"
! #endif
!         );
      else
!          printf(LABEL_FORMAT, "2 open_datasync 8k writes"
! #if PG_O_DIRECT != 0
!         "*"
! #endif
!         );
      fflush(stdout);

      if ((tmpfile = open(filename, O_RDWR | O_DSYNC, 0)) == -1)
*************** test_sync(int writes_per_op)
*** 201,207 ****
       */
  #if PG_O_DIRECT != 0
      if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1)
!         printf(NA_FORMAT, "o_direct", "n/a on this filesystem\n");
      else
      {
          if (writes_per_op == 1)
--- 210,219 ----
       */
  #if PG_O_DIRECT != 0
      if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1)
!     {
!         printf(NA_FORMAT, "o_direct", "n/a**\n");
!         fs_warning = true;
!     }
      else
      {
          if (writes_per_op == 1)
*************** test_sync(int writes_per_op)
*** 321,329 ****
   */
  #ifdef OPEN_SYNC_FLAG
      if (writes_per_op == 1)
!         printf(LABEL_FORMAT, "open_sync 8k write");
      else
!         printf(LABEL_FORMAT, "2 open_sync 8k writes");
      fflush(stdout);

      if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG, 0)) == -1)
--- 333,349 ----
   */
  #ifdef OPEN_SYNC_FLAG
      if (writes_per_op == 1)
!         printf(LABEL_FORMAT, "open_sync 8k write"
! #if PG_O_DIRECT != 0
!         "*"
! #endif
!         );
      else
!         printf(LABEL_FORMAT, "2 open_sync 8k writes"
! #if PG_O_DIRECT != 0
!         "*"
! #endif
!         );
      fflush(stdout);

      if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG, 0)) == -1)
*************** test_sync(int writes_per_op)
*** 352,358 ****
      fflush(stdout);

      if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
!         printf(NA_FORMAT, "o_direct", "n/a on this filesystem\n");
      else
      {
          gettimeofday(&start_t, NULL);
--- 372,381 ----
      fflush(stdout);

      if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
!     {
!         printf(NA_FORMAT, "o_direct", "n/a**\n");
!         fs_warning = true;
!     }
      else
      {
          gettimeofday(&start_t, NULL);
*************** test_sync(int writes_per_op)
*** 375,380 ****
--- 398,414 ----
  #else
      printf(NA_FORMAT, "open_sync", "n/a\n");
  #endif
+
+ #if defined(OPEN_DATASYNC_FLAG) || defined(OPEN_SYNC_FLAG)
+     if (PG_O_DIRECT != 0)
+         printf("* This non-direct I/O option is not used by Postgres.\n");
+ #endif
+
+     if (fs_warning)
+     {
+         printf("** This file system and its mount options do not support direct\n");
+         printf("I/O, e.g. ext4 in journaled mode.\n");
+     }
  }

  void
*************** test_open_syncs(void)
*** 389,394 ****
--- 423,430 ----
      printf("(This is designed to compare the cost of one large\n");
      printf("sync'ed write and two smaller sync'ed writes.)\n");

+     /* XXX no PG_O_DIRECT */
+
  /*
   * Test open_sync with different size files
   */

pgsql-hackers by date:

Previous
From: "Kevin Grittner"
Date:
Subject: .gitignore file needed for new replication parser
Next
From: Bruce Momjian
Date:
Subject: Re: What happened to open_sync_without_odirect?