Re: A patch for xlog.c - Mailing list pgsql-patches

From Bruce Momjian
Subject Re: A patch for xlog.c
Date
Msg-id 200102242101.QAA08185@candle.pha.pa.us
Whole thread Raw
In response to A patch for xlog.c  (Matthew Kirkwood <matthew@hairy.beasts.org>)
Responses Re: A patch for xlog.c
List pgsql-patches
I am confused why mmap() is better than writing to a real file.  Don't
we need to write to a real file so it is available for database
recovery?


> Hi,
>
> Here is a patch against 7.1beta5 to use mmap(), and thus a
> single write, to initialise xlogs.  It may well improve
> performance of this on platforms/filesystems which write
> metadata synchronously.
>
> It needs a configure test, but certainly builds and runs
> OK.
>
> It also wraps the file reopening in an "ifdef WIN32", since
> it certainly isn't needed for UNIX-like platforms (which I
> assume includes BeOS).
>
> Matthew.
>
>
> diff -ruN postgresql-7.1beta5-clean/src/backend/access/transam/xlog.c
postgresql-7.1beta5/src/backend/access/transam/xlog.c
> --- postgresql-7.1beta5-clean/src/backend/access/transam/xlog.c    Fri Feb 23 18:12:00 2001
> +++ postgresql-7.1beta5/src/backend/access/transam/xlog.c    Sat Feb 24 15:23:41 2001
> @@ -24,6 +24,10 @@
>  #include <locale.h>
>  #endif
>
> +#ifdef    _HAVE_MMAP
> +#include <sys/mman.h>
> +#endif
> +
>  #include "access/transam.h"
>  #include "access/xact.h"
>  #include "catalog/catversion.h"
> @@ -36,6 +40,7 @@
>  #include "access/xlogutils.h"
>  #include "utils/builtins.h"
>  #include "utils/relcache.h"
> +#include "utils/pfile.h"
>
>  #include "miscadmin.h"
>
> @@ -53,6 +58,10 @@
>  StartUpID    ThisStartUpID = 0;
>  XLogRecPtr    RedoRecPtr;
>
> +#ifdef    _HAVE_MMAP
> +void        *zmmap = NULL;
> +#endif
> +
>  int            XLOG_DEBUG = 0;
>
>  /* To read/update control file and create new log file */
> @@ -955,7 +964,6 @@
>  {
>      char        path[MAXPGPATH];
>      char        tpath[MAXPGPATH];
> -    char        zbuffer[BLCKSZ];
>      int            fd;
>      int            nbytes;
>
> @@ -987,28 +995,36 @@
>          elog(STOP, "InitCreate(logfile %u seg %u) failed: %m",
>               logId, logSeg);
>
> -    /*
> -     * Zero-fill the file.  We have to do this the hard way to ensure that
> -     * all the file space has really been allocated --- on platforms that
> -     * allow "holes" in files, just seeking to the end doesn't allocate
> -     * intermediate space.  This way, we know that we have all the space
> -     * and (after the fsync below) that all the indirect blocks are down
> -     * on disk.  Therefore, fdatasync(2) will be sufficient to sync future
> -     * writes to the log file.
> -     */
> -    MemSet(zbuffer, 0, sizeof(zbuffer));
> -    for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(zbuffer))
> +#ifdef    _HAVE_MMAP
> +    if (!zmmap || (write(fd, zmmap, XLogSegSize) != XLogSegSize))
> +#endif
>      {
> -        if ((int) write(fd, zbuffer, sizeof(zbuffer)) != (int) sizeof(zbuffer))
> -            elog(STOP, "ZeroFill(logfile %u seg %u) failed: %m",
> -                 logId, logSeg);
> +        /*
> +         * Zero-fill the file.  We have to do this the hard way to ensure that
> +         * all the file space has really been allocated --- on platforms that
> +         * allow "holes" in files, just seeking to the end doesn't allocate
> +         * intermediate space.  This way, we know that we have all the space
> +         * and (after the fsync below) that all the indirect blocks are down
> +         * on disk.  Therefore, fdatasync(2) will be sufficient to sync future
> +         * writes to the log file.
> +         */
> +        char        zbuffer[BLCKSZ];
> +        MemSet(zbuffer, 0, sizeof(zbuffer));
> +        for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(zbuffer))
> +        {
> +            if ((int) write(fd, zbuffer, sizeof(zbuffer)) != (int) sizeof(zbuffer))
> +                elog(STOP, "ZeroFill(logfile %u seg %u) failed: %m",
> +                     logId, logSeg);
> +        }
>      }
>
>      if (pg_fsync(fd) != 0)
>          elog(STOP, "fsync(logfile %u seg %u) failed: %m",
>               logId, logSeg);
>
> +#ifdef    WIN32
>      close(fd);
> +#endif
>
>      /*
>       * Prefer link() to rename() here just to be sure that we don't overwrite
> @@ -1026,10 +1042,12 @@
>               logId, logSeg);
>  #endif
>
> +#ifdef    WIN32
>      fd = BasicOpenFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
>      if (fd < 0)
>          elog(STOP, "InitReopen(logfile %u seg %u) failed: %m",
>               logId, logSeg);
> +#endif
>
>      return (fd);
>  }
> @@ -1255,11 +1273,8 @@
>      if (noBlck || readOff != (RecPtr->xrecoff % XLogSegSize) / BLCKSZ)
>      {
>          readOff = (RecPtr->xrecoff % XLogSegSize) / BLCKSZ;
> -        if (lseek(readFile, (off_t) (readOff * BLCKSZ), SEEK_SET) < 0)
> -            elog(STOP, "ReadRecord: lseek(logfile %u seg %u off %u) failed: %m",
> -                 readId, readSeg, readOff);
> -        if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
> -            elog(STOP, "ReadRecord: read(logfile %u seg %u off %u) failed: %m",
> +        if (pg_pread(readFile, readBuf, BLCKSZ, (readOff * BLCKSZ)) != BLCKSZ)
> +            elog(STOP, "ReadRecord: pg_pread(logfile %u seg %u off %u) failed: %m",
>                   readId, readSeg, readOff);
>          if (((XLogPageHeader) readBuf)->xlp_magic != XLOG_PAGE_MAGIC)
>          {
> @@ -1415,19 +1430,13 @@
>          elog(LOG, "Formatting logfile %u seg %u block %u at offset %u",
>               readId, readSeg, readOff, EndRecPtr.xrecoff % BLCKSZ);
>          readFile = XLogFileOpen(readId, readSeg, false);
> -        if (lseek(readFile, (off_t) (readOff * BLCKSZ), SEEK_SET) < 0)
> -            elog(STOP, "ReadRecord: lseek(logfile %u seg %u off %u) failed: %m",
> -                 readId, readSeg, readOff);
> -        if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
> -            elog(STOP, "ReadRecord: read(logfile %u seg %u off %u) failed: %m",
> +        if (pg_pread(readFile, readBuf, BLCKSZ, (readOff * BLCKSZ)) != BLCKSZ)
> +            elog(STOP, "ReadRecord: pg_pread(logfile %u seg %u off %u) failed: %m",
>                   readId, readSeg, readOff);
>          memset(readBuf + EndRecPtr.xrecoff % BLCKSZ, 0,
>                 BLCKSZ - EndRecPtr.xrecoff % BLCKSZ);
> -        if (lseek(readFile, (off_t) (readOff * BLCKSZ), SEEK_SET) < 0)
> -            elog(STOP, "ReadRecord: lseek(logfile %u seg %u off %u) failed: %m",
> -                 readId, readSeg, readOff);
> -        if (write(readFile, readBuf, BLCKSZ) != BLCKSZ)
> -            elog(STOP, "ReadRecord: write(logfile %u seg %u off %u) failed: %m",
> +        if (pg_pwrite(readFile, readBuf, BLCKSZ, (readOff * BLCKSZ)) != BLCKSZ)
> +            elog(STOP, "ReadRecord: pg_pwrite(logfile %u seg %u off %u) failed: %m",
>                   readId, readSeg, readOff);
>          readOff++;
>      }
> @@ -1797,6 +1806,28 @@
>      return buf;
>  }
>
> +
> +#ifdef    _HAVE_MMAP
> +static void
> +ZeroMapInit(void)
> +{
> +    int zfd;
> +
> +    zfd = BasicOpenFile("/dev/zero", O_RDONLY, 0);
> +    if (zfd < 0) {
> +        elog(LOG, "Can't open /dev/zero: %m");
> +        return;
> +    }
> +    zmmap = mmap(NULL, XLogSegSize, PROT_READ, MAP_SHARED, zfd, 0);
> +    if (!zmmap)
> +        elog(LOG, "Can't mmap /dev/zero: %m");
> +    close(zfd);
> +}
> +#else
> +#define    ZeroMapInit()
> +#endif
> +
> +
>  /*
>   * This func must be called ONCE on system startup
>   */
> @@ -1811,6 +1842,9 @@
>      char        buffer[_INTL_MAXLOGRECSZ + SizeOfXLogRecord];
>
>      elog(LOG, "starting up");
> +
> +    ZeroMapInit();
> +
>      CritSectionCount++;
>
>      XLogCtl->xlblocks = (XLogRecPtr *) (((char *) XLogCtl) + sizeof(XLogCtlData));
>
>


--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

pgsql-patches by date:

Previous
From: Matthew Kirkwood
Date:
Subject: Re: A patch for xlog.c
Next
From: Tom Lane
Date:
Subject: Re: A patch for xlog.c