A patch for xlog.c - Mailing list pgsql-patches

From Matthew Kirkwood
Subject A patch for xlog.c
Date
Msg-id Pine.LNX.4.10.10102241536450.12959-100000@sphinx.mythic-beasts.com
Whole thread Raw
Responses Re: A patch for xlog.c
Re: A patch for xlog.c
List pgsql-patches
Hi,

Here is a patch against 7.1beta5 to use mmap(), and thus a
single write, to initialise xlogs.  It may well improve
performance of this on platforms/filesystems which write
metadata synchronously.

It needs a configure test, but certainly builds and runs
OK.

It also wraps the file reopening in an "ifdef WIN32", since
it certainly isn't needed for UNIX-like platforms (which I
assume includes BeOS).

Matthew.


diff -ruN postgresql-7.1beta5-clean/src/backend/access/transam/xlog.c
postgresql-7.1beta5/src/backend/access/transam/xlog.c
--- postgresql-7.1beta5-clean/src/backend/access/transam/xlog.c    Fri Feb 23 18:12:00 2001
+++ postgresql-7.1beta5/src/backend/access/transam/xlog.c    Sat Feb 24 15:23:41 2001
@@ -24,6 +24,10 @@
 #include <locale.h>
 #endif

+#ifdef    _HAVE_MMAP
+#include <sys/mman.h>
+#endif
+
 #include "access/transam.h"
 #include "access/xact.h"
 #include "catalog/catversion.h"
@@ -36,6 +40,7 @@
 #include "access/xlogutils.h"
 #include "utils/builtins.h"
 #include "utils/relcache.h"
+#include "utils/pfile.h"

 #include "miscadmin.h"

@@ -53,6 +58,10 @@
 StartUpID    ThisStartUpID = 0;
 XLogRecPtr    RedoRecPtr;

+#ifdef    _HAVE_MMAP
+void        *zmmap = NULL;
+#endif
+
 int            XLOG_DEBUG = 0;

 /* To read/update control file and create new log file */
@@ -955,7 +964,6 @@
 {
     char        path[MAXPGPATH];
     char        tpath[MAXPGPATH];
-    char        zbuffer[BLCKSZ];
     int            fd;
     int            nbytes;

@@ -987,28 +995,36 @@
         elog(STOP, "InitCreate(logfile %u seg %u) failed: %m",
              logId, logSeg);

-    /*
-     * Zero-fill the file.  We have to do this the hard way to ensure that
-     * all the file space has really been allocated --- on platforms that
-     * allow "holes" in files, just seeking to the end doesn't allocate
-     * intermediate space.  This way, we know that we have all the space
-     * and (after the fsync below) that all the indirect blocks are down
-     * on disk.  Therefore, fdatasync(2) will be sufficient to sync future
-     * writes to the log file.
-     */
-    MemSet(zbuffer, 0, sizeof(zbuffer));
-    for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(zbuffer))
+#ifdef    _HAVE_MMAP
+    if (!zmmap || (write(fd, zmmap, XLogSegSize) != XLogSegSize))
+#endif
     {
-        if ((int) write(fd, zbuffer, sizeof(zbuffer)) != (int) sizeof(zbuffer))
-            elog(STOP, "ZeroFill(logfile %u seg %u) failed: %m",
-                 logId, logSeg);
+        /*
+         * Zero-fill the file.  We have to do this the hard way to ensure that
+         * all the file space has really been allocated --- on platforms that
+         * allow "holes" in files, just seeking to the end doesn't allocate
+         * intermediate space.  This way, we know that we have all the space
+         * and (after the fsync below) that all the indirect blocks are down
+         * on disk.  Therefore, fdatasync(2) will be sufficient to sync future
+         * writes to the log file.
+         */
+        char        zbuffer[BLCKSZ];
+        MemSet(zbuffer, 0, sizeof(zbuffer));
+        for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(zbuffer))
+        {
+            if ((int) write(fd, zbuffer, sizeof(zbuffer)) != (int) sizeof(zbuffer))
+                elog(STOP, "ZeroFill(logfile %u seg %u) failed: %m",
+                     logId, logSeg);
+        }
     }

     if (pg_fsync(fd) != 0)
         elog(STOP, "fsync(logfile %u seg %u) failed: %m",
              logId, logSeg);

+#ifdef    WIN32
     close(fd);
+#endif

     /*
      * Prefer link() to rename() here just to be sure that we don't overwrite
@@ -1026,10 +1042,12 @@
              logId, logSeg);
 #endif

+#ifdef    WIN32
     fd = BasicOpenFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
     if (fd < 0)
         elog(STOP, "InitReopen(logfile %u seg %u) failed: %m",
              logId, logSeg);
+#endif

     return (fd);
 }
@@ -1255,11 +1273,8 @@
     if (noBlck || readOff != (RecPtr->xrecoff % XLogSegSize) / BLCKSZ)
     {
         readOff = (RecPtr->xrecoff % XLogSegSize) / BLCKSZ;
-        if (lseek(readFile, (off_t) (readOff * BLCKSZ), SEEK_SET) < 0)
-            elog(STOP, "ReadRecord: lseek(logfile %u seg %u off %u) failed: %m",
-                 readId, readSeg, readOff);
-        if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
-            elog(STOP, "ReadRecord: read(logfile %u seg %u off %u) failed: %m",
+        if (pg_pread(readFile, readBuf, BLCKSZ, (readOff * BLCKSZ)) != BLCKSZ)
+            elog(STOP, "ReadRecord: pg_pread(logfile %u seg %u off %u) failed: %m",
                  readId, readSeg, readOff);
         if (((XLogPageHeader) readBuf)->xlp_magic != XLOG_PAGE_MAGIC)
         {
@@ -1415,19 +1430,13 @@
         elog(LOG, "Formatting logfile %u seg %u block %u at offset %u",
              readId, readSeg, readOff, EndRecPtr.xrecoff % BLCKSZ);
         readFile = XLogFileOpen(readId, readSeg, false);
-        if (lseek(readFile, (off_t) (readOff * BLCKSZ), SEEK_SET) < 0)
-            elog(STOP, "ReadRecord: lseek(logfile %u seg %u off %u) failed: %m",
-                 readId, readSeg, readOff);
-        if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
-            elog(STOP, "ReadRecord: read(logfile %u seg %u off %u) failed: %m",
+        if (pg_pread(readFile, readBuf, BLCKSZ, (readOff * BLCKSZ)) != BLCKSZ)
+            elog(STOP, "ReadRecord: pg_pread(logfile %u seg %u off %u) failed: %m",
                  readId, readSeg, readOff);
         memset(readBuf + EndRecPtr.xrecoff % BLCKSZ, 0,
                BLCKSZ - EndRecPtr.xrecoff % BLCKSZ);
-        if (lseek(readFile, (off_t) (readOff * BLCKSZ), SEEK_SET) < 0)
-            elog(STOP, "ReadRecord: lseek(logfile %u seg %u off %u) failed: %m",
-                 readId, readSeg, readOff);
-        if (write(readFile, readBuf, BLCKSZ) != BLCKSZ)
-            elog(STOP, "ReadRecord: write(logfile %u seg %u off %u) failed: %m",
+        if (pg_pwrite(readFile, readBuf, BLCKSZ, (readOff * BLCKSZ)) != BLCKSZ)
+            elog(STOP, "ReadRecord: pg_pwrite(logfile %u seg %u off %u) failed: %m",
                  readId, readSeg, readOff);
         readOff++;
     }
@@ -1797,6 +1806,28 @@
     return buf;
 }

+
+#ifdef    _HAVE_MMAP
+static void
+ZeroMapInit(void)
+{
+    int zfd;
+
+    zfd = BasicOpenFile("/dev/zero", O_RDONLY, 0);
+    if (zfd < 0) {
+        elog(LOG, "Can't open /dev/zero: %m");
+        return;
+    }
+    zmmap = mmap(NULL, XLogSegSize, PROT_READ, MAP_SHARED, zfd, 0);
+    if (!zmmap)
+        elog(LOG, "Can't mmap /dev/zero: %m");
+    close(zfd);
+}
+#else
+#define    ZeroMapInit()
+#endif
+
+
 /*
  * This func must be called ONCE on system startup
  */
@@ -1811,6 +1842,9 @@
     char        buffer[_INTL_MAXLOGRECSZ + SizeOfXLogRecord];

     elog(LOG, "starting up");
+
+    ZeroMapInit();
+
     CritSectionCount++;

     XLogCtl->xlblocks = (XLogRecPtr *) (((char *) XLogCtl) + sizeof(XLogCtlData));


pgsql-patches by date:

Previous
From: Cedar Cox
Date:
Subject: Re: [ODBC] ODBC Driver regedit file.
Next
From: Matthew Kirkwood
Date:
Subject: Small misc tidyup patch