Thread: Patch for Re: [HACKERS] Caching number of blocks in relation to avoi lseek.

Patch for Re: [HACKERS] Caching number of blocks in relation to avoi lseek.

From
Denis Perchine
Date:
> Oh.  Hmm.  Not sure if it's really worth the trouble, but you could try
> having fd.c keep track of the current seek position of VFDs when they
> are open as well as when they are closed, and optimize away the lseek
> call in FileSeek if the position is already right.  You'd have to think
> carefully about what to do if a read or write fails, however --- where
> has the kernel left its seek position in that case?  Possibly this could
> be dealt with by setting the internal position to "unknown" anytime
> we're not perfectly sure where the kernel is.

If read or write fails. Position will left the same. This situation is already tracked
in File routines, but a little bit incorrectly.

Here is the full patch for this. This patch reduce amount of lseek call ten times
for update statement and twenty times for select statement. I tested joined update
and count(*) select for table with rows > 170000 and 10 indices.
I think this is worse of trying. Before lseek calls account for more than 5% of time.
Now they are 0.89 and 0.15 respectevly.

Due to only one file modification patch should be applied in src/backedn/storage/file/ dir.

--
Sincerely Yours,
Denis Perchine

----------------------------------
E-Mail: dyp@perchine.com
HomePage: http://www.perchine.com/dyp/
FidoNet: 2:5000/120.5
----------------------------------

Attachment
> If read or write fails. Position will left the same. This situation is already tracked
> in File routines, but a little bit incorrectly.

After small survey in Linux kernel code, I am not sure about it.
New patch set pos to unknown in the case of read/write fails. And do
lseek again.

> Here is the full patch for this. This patch reduce amount of lseek call ten times
> for update statement and twenty times for select statement. I tested joined update
> and count(*) select for table with rows > 170000 and 10 indices.
> I think this is worse of trying. Before lseek calls account for more than 5% of time.
> Now they are 0.89 and 0.15 respectevly.
>
> Due to only one file modification patch should be applied in src/backedn/storage/file/ dir.

--
Sincerely Yours,
Denis Perchine

----------------------------------
E-Mail: dyp@perchine.com
HomePage: http://www.perchine.com/dyp/
FidoNet: 2:5000/120.5
----------------------------------

Attachment
Yoo, hoo.  No one complained about this patch, so in it goes.  I bet
this will be a real speedup on some platforms.  I know some OS's don't
do fseek() as fast as we would like.

Thanks.

> > If read or write fails. Position will left the same. This situation is already tracked
> > in File routines, but a little bit incorrectly.
>
> After small survey in Linux kernel code, I am not sure about it.
> New patch set pos to unknown in the case of read/write fails. And do
> lseek again.
>
> > Here is the full patch for this. This patch reduce amount of lseek call ten times
> > for update statement and twenty times for select statement. I tested joined update
> > and count(*) select for table with rows > 170000 and 10 indices.
> > I think this is worse of trying. Before lseek calls account for more than 5% of time.
> > Now they are 0.89 and 0.15 respectevly.
> >
> > Due to only one file modification patch should be applied in src/backedn/storage/file/ dir.
>
> --
> Sincerely Yours,
> Denis Perchine
>
> ----------------------------------
> E-Mail: dyp@perchine.com
> HomePage: http://www.perchine.com/dyp/
> FidoNet: 2:5000/120.5
> ----------------------------------

[ Attachment, skipping... ]

Index: fd.c
===================================================================
RCS file: /home/projects/pgsql/cvsroot/pgsql/src/backend/storage/file/fd.c,v
retrieving revision 1.59
diff -c -b -w -r1.59 fd.c
*** fd.c    2000/06/02 15:57:24    1.59
--- fd.c    2000/06/13 08:34:55
***************
*** 95,100 ****
--- 95,102 ----

  #define FileIsNotOpen(file) (VfdCache[file].fd == VFD_CLOSED)

+ #define FileUnknownPos (-1)
+
  typedef struct vfd
  {
      signed short fd;            /* current FD, or VFD_CLOSED if none */
***************
*** 790,795 ****
--- 792,799 ----
      returnCode = read(VfdCache[file].fd, buffer, amount);
      if (returnCode > 0)
          VfdCache[file].seekPos += returnCode;
+     else
+         VfdCache[file].seekPos = FileUnknownPos;

      return returnCode;
  }
***************
*** 806,816 ****

      FileAccess(file);
      returnCode = write(VfdCache[file].fd, buffer, amount);
!     if (returnCode > 0)
          VfdCache[file].seekPos += returnCode;
-
      /* mark the file as needing fsync */
      VfdCache[file].fdstate |= FD_DIRTY;

      return returnCode;
  }
--- 810,821 ----

      FileAccess(file);
      returnCode = write(VfdCache[file].fd, buffer, amount);
!     if (returnCode > 0) {
          VfdCache[file].seekPos += returnCode;
          /* mark the file as needing fsync */
          VfdCache[file].fdstate |= FD_DIRTY;
+     } else
+         VfdCache[file].seekPos = FileUnknownPos;

      return returnCode;
  }
***************
*** 840,849 ****
              default:
                  elog(ERROR, "FileSeek: invalid whence: %d", whence);
                  break;
-         }
      }
!     else
          VfdCache[file].seekPos = lseek(VfdCache[file].fd, offset, whence);
      return VfdCache[file].seekPos;
  }

--- 845,870 ----
              default:
                  elog(ERROR, "FileSeek: invalid whence: %d", whence);
                  break;
          }
!     } else
!         switch (whence) {
!             case SEEK_SET:
!                 if (offset < 0)
!                     elog(ERROR, "FileSeek: invalid offset: %ld", offset);
!                 if (VfdCache[file].seekPos != offset)
!                     VfdCache[file].seekPos = lseek(VfdCache[file].fd, offset, whence);
!                 break;
!             case SEEK_CUR:
!                 if ((offset != 0) || (VfdCache[file].seekPos == FileUnknownPos));
                      VfdCache[file].seekPos = lseek(VfdCache[file].fd, offset, whence);
+                 break;
+             case SEEK_END:
+                 VfdCache[file].seekPos = lseek(VfdCache[file].fd, offset, whence);
+                 break;
+             default:
+                 elog(ERROR, "FileSeek: invalid whence: %d", whence);
+                 break;
+         }
      return VfdCache[file].seekPos;
  }


--
  Bruce Momjian                        |  http://www.op.net/~candle
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026