performance: use pread instead of lseek+read - Mailing list pgsql-patches

From Manfred Spraul
Subject performance: use pread instead of lseek+read
Date
Msg-id 3E5A6349.4060900@colorfullife.com
Whole thread Raw
Responses Re: performance: use pread instead of lseek+read
List pgsql-patches
Hi all,

postgresql tries very hard to avoid calling lseek if not needed,
probably to avoid doing unnecessary syscalls.
What about removing lseek entirely and using the p{read,write}?

pread is identical to the normal read syscall, except that it has one
additional parameter: the position from which the data should be read.
All recent unices support that, it's part of POSIX.1c.

Attached is a patch vs the cvs tree.
It seems to work - 7.3.2 with the patch applied passes the regression
test suite on RH Linux.
Untested with cvs-head, preproc.y causes a parser overflow.

What do you think?
- configure: I test for existance of pread, and assume that pwrite will
exist, too. Acceptable?
- Are you interested in further patches? xlog.c contains a few lseeks,
but I doubt that they are in the critical path.

--
   Manfred
diff -u -u -r -x configure pgsql.orig/configure.in pgsql/configure.in
--- pgsql.orig/configure.in    2003-02-19 05:04:04.000000000 +0100
+++ pgsql/configure.in    2003-02-23 09:38:46.000000000 +0100
@@ -786,7 +786,7 @@
 # SunOS doesn't handle negative byte comparisons properly with +/- return
 AC_FUNC_MEMCMP

-AC_CHECK_FUNCS([cbrt fcvt getpeereid memmove pstat setproctitle setsid sigprocmask sysconf waitpid dlopen fdatasync
utimeutimes]) 
+AC_CHECK_FUNCS([cbrt fcvt getpeereid memmove pstat setproctitle setsid sigprocmask sysconf waitpid dlopen fdatasync
utimeutimes pread]) 

 AC_CHECK_DECLS(fdatasync, [], [], [#include <unistd.h>])

diff -u -u -r -x configure pgsql.orig/src/backend/storage/file/fd.c pgsql/src/backend/storage/file/fd.c
--- pgsql.orig/src/backend/storage/file/fd.c    2002-09-02 08:11:42.000000000 +0200
+++ pgsql/src/backend/storage/file/fd.c    2003-02-23 09:45:41.000000000 +0100
@@ -391,7 +391,9 @@
     Delete(file);

     /* save the seek position */
+#ifndef HAVE_PREAD
     vfdP->seekPos = (long) lseek(vfdP->fd, 0L, SEEK_CUR);
+#endif
     Assert(vfdP->seekPos != -1L);

     /* close the file */
@@ -462,6 +464,7 @@
             ++nfile;
         }

+#ifndef HAVE_PREAD
         /* seek to the right position */
         if (vfdP->seekPos != 0L)
         {
@@ -470,6 +473,7 @@
             returnValue = (long) lseek(vfdP->fd, vfdP->seekPos, SEEK_SET);
             Assert(returnValue != -1L);
         }
+#endif
     }

     /*
@@ -877,11 +881,17 @@
                VfdCache[file].seekPos, amount, buffer));

     FileAccess(file);
+#if HAVE_PREAD
+    returnCode = pread(VfdCache[file].fd, buffer, amount, VfdCache[file].seekPos);
+    if (returnCode > 0)
+        VfdCache[file].seekPos += returnCode;
+#else
     returnCode = read(VfdCache[file].fd, buffer, amount);
     if (returnCode > 0)
         VfdCache[file].seekPos += returnCode;
     else
         VfdCache[file].seekPos = FileUnknownPos;
+#endif

     return returnCode;
 }
@@ -900,16 +910,25 @@
     FileAccess(file);

     errno = 0;
+#if HAVE_PREAD
+    returnCode = pwrite(VfdCache[file].fd, buffer, amount, VfdCache[file].seekPos);
+#else
     returnCode = write(VfdCache[file].fd, buffer, amount);
+#endif

     /* if write didn't set errno, assume problem is no disk space */
     if (returnCode != amount && errno == 0)
         errno = ENOSPC;

+#if HAVE_PREAD
+    if (returnCode > 0)
+        VfdCache[file].seekPos += returnCode;
+#else
     if (returnCode > 0)
         VfdCache[file].seekPos += returnCode;
     else
         VfdCache[file].seekPos = FileUnknownPos;
+#endif

     return returnCode;
 }
@@ -951,12 +970,20 @@
             case SEEK_SET:
                 if (offset < 0)
                     elog(ERROR, "FileSeek: invalid offset: %ld", offset);
+#ifdef HAVE_PREAD
+                VfdCache[file].seekPos = offset;
+#else
                 if (VfdCache[file].seekPos != offset)
                     VfdCache[file].seekPos = lseek(VfdCache[file].fd, offset, whence);
+#endif
                 break;
             case SEEK_CUR:
+#ifdef HAVE_PREAD
+                VfdCache[file].seekPos += offset;
+#else
                 if (offset != 0 || VfdCache[file].seekPos == FileUnknownPos)
                     VfdCache[file].seekPos = lseek(VfdCache[file].fd, offset, whence);
+#endif
                 break;
             case SEEK_END:
                 VfdCache[file].seekPos = lseek(VfdCache[file].fd, offset, whence);
diff -u -u -r -x configure pgsql.orig/src/include/pg_config.h.in pgsql/src/include/pg_config.h.in
--- pgsql.orig/src/include/pg_config.h.in    2003-02-19 05:04:04.000000000 +0100
+++ pgsql/src/include/pg_config.h.in    2003-02-23 09:59:47.000000000 +0100
@@ -544,6 +544,9 @@
 /* Define if the standard header unistd.h declares fdatasync() */
 #undef HAVE_DECL_FDATASYNC

+/* Define if you have pread(). Implies pwrite, too. */
+#undef HAVE_PREAD
+
 /* Set to 1 if you have libz.a */
 #undef HAVE_LIBZ


pgsql-patches by date:

Previous
From: Tom Lane
Date:
Subject: Re: psql patch
Next
From: Tom Lane
Date:
Subject: Re: psql patch