Re: A patch for xlog.c - Mailing list pgsql-patches
From | Bruce Momjian |
---|---|
Subject | Re: A patch for xlog.c |
Date | |
Msg-id | 200102242101.QAA08185@candle.pha.pa.us Whole thread Raw |
In response to | A patch for xlog.c (Matthew Kirkwood <matthew@hairy.beasts.org>) |
Responses |
Re: A patch for xlog.c
|
List | pgsql-patches |
I am confused why mmap() is better than writing to a real file. Don't we need to write to a real file so it is available for database recovery? > Hi, > > Here is a patch against 7.1beta5 to use mmap(), and thus a > single write, to initialise xlogs. It may well improve > performance of this on platforms/filesystems which write > metadata synchronously. > > It needs a configure test, but certainly builds and runs > OK. > > It also wraps the file reopening in an "ifdef WIN32", since > it certainly isn't needed for UNIX-like platforms (which I > assume includes BeOS). > > Matthew. > > > diff -ruN postgresql-7.1beta5-clean/src/backend/access/transam/xlog.c postgresql-7.1beta5/src/backend/access/transam/xlog.c > --- postgresql-7.1beta5-clean/src/backend/access/transam/xlog.c Fri Feb 23 18:12:00 2001 > +++ postgresql-7.1beta5/src/backend/access/transam/xlog.c Sat Feb 24 15:23:41 2001 > @@ -24,6 +24,10 @@ > #include <locale.h> > #endif > > +#ifdef _HAVE_MMAP > +#include <sys/mman.h> > +#endif > + > #include "access/transam.h" > #include "access/xact.h" > #include "catalog/catversion.h" > @@ -36,6 +40,7 @@ > #include "access/xlogutils.h" > #include "utils/builtins.h" > #include "utils/relcache.h" > +#include "utils/pfile.h" > > #include "miscadmin.h" > > @@ -53,6 +58,10 @@ > StartUpID ThisStartUpID = 0; > XLogRecPtr RedoRecPtr; > > +#ifdef _HAVE_MMAP > +void *zmmap = NULL; > +#endif > + > int XLOG_DEBUG = 0; > > /* To read/update control file and create new log file */ > @@ -955,7 +964,6 @@ > { > char path[MAXPGPATH]; > char tpath[MAXPGPATH]; > - char zbuffer[BLCKSZ]; > int fd; > int nbytes; > > @@ -987,28 +995,36 @@ > elog(STOP, "InitCreate(logfile %u seg %u) failed: %m", > logId, logSeg); > > - /* > - * Zero-fill the file. We have to do this the hard way to ensure that > - * all the file space has really been allocated --- on platforms that > - * allow "holes" in files, just seeking to the end doesn't allocate > - * intermediate space. This way, we know that we have all the space > - * and (after the fsync below) that all the indirect blocks are down > - * on disk. Therefore, fdatasync(2) will be sufficient to sync future > - * writes to the log file. > - */ > - MemSet(zbuffer, 0, sizeof(zbuffer)); > - for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(zbuffer)) > +#ifdef _HAVE_MMAP > + if (!zmmap || (write(fd, zmmap, XLogSegSize) != XLogSegSize)) > +#endif > { > - if ((int) write(fd, zbuffer, sizeof(zbuffer)) != (int) sizeof(zbuffer)) > - elog(STOP, "ZeroFill(logfile %u seg %u) failed: %m", > - logId, logSeg); > + /* > + * Zero-fill the file. We have to do this the hard way to ensure that > + * all the file space has really been allocated --- on platforms that > + * allow "holes" in files, just seeking to the end doesn't allocate > + * intermediate space. This way, we know that we have all the space > + * and (after the fsync below) that all the indirect blocks are down > + * on disk. Therefore, fdatasync(2) will be sufficient to sync future > + * writes to the log file. > + */ > + char zbuffer[BLCKSZ]; > + MemSet(zbuffer, 0, sizeof(zbuffer)); > + for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(zbuffer)) > + { > + if ((int) write(fd, zbuffer, sizeof(zbuffer)) != (int) sizeof(zbuffer)) > + elog(STOP, "ZeroFill(logfile %u seg %u) failed: %m", > + logId, logSeg); > + } > } > > if (pg_fsync(fd) != 0) > elog(STOP, "fsync(logfile %u seg %u) failed: %m", > logId, logSeg); > > +#ifdef WIN32 > close(fd); > +#endif > > /* > * Prefer link() to rename() here just to be sure that we don't overwrite > @@ -1026,10 +1042,12 @@ > logId, logSeg); > #endif > > +#ifdef WIN32 > fd = BasicOpenFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR); > if (fd < 0) > elog(STOP, "InitReopen(logfile %u seg %u) failed: %m", > logId, logSeg); > +#endif > > return (fd); > } > @@ -1255,11 +1273,8 @@ > if (noBlck || readOff != (RecPtr->xrecoff % XLogSegSize) / BLCKSZ) > { > readOff = (RecPtr->xrecoff % XLogSegSize) / BLCKSZ; > - if (lseek(readFile, (off_t) (readOff * BLCKSZ), SEEK_SET) < 0) > - elog(STOP, "ReadRecord: lseek(logfile %u seg %u off %u) failed: %m", > - readId, readSeg, readOff); > - if (read(readFile, readBuf, BLCKSZ) != BLCKSZ) > - elog(STOP, "ReadRecord: read(logfile %u seg %u off %u) failed: %m", > + if (pg_pread(readFile, readBuf, BLCKSZ, (readOff * BLCKSZ)) != BLCKSZ) > + elog(STOP, "ReadRecord: pg_pread(logfile %u seg %u off %u) failed: %m", > readId, readSeg, readOff); > if (((XLogPageHeader) readBuf)->xlp_magic != XLOG_PAGE_MAGIC) > { > @@ -1415,19 +1430,13 @@ > elog(LOG, "Formatting logfile %u seg %u block %u at offset %u", > readId, readSeg, readOff, EndRecPtr.xrecoff % BLCKSZ); > readFile = XLogFileOpen(readId, readSeg, false); > - if (lseek(readFile, (off_t) (readOff * BLCKSZ), SEEK_SET) < 0) > - elog(STOP, "ReadRecord: lseek(logfile %u seg %u off %u) failed: %m", > - readId, readSeg, readOff); > - if (read(readFile, readBuf, BLCKSZ) != BLCKSZ) > - elog(STOP, "ReadRecord: read(logfile %u seg %u off %u) failed: %m", > + if (pg_pread(readFile, readBuf, BLCKSZ, (readOff * BLCKSZ)) != BLCKSZ) > + elog(STOP, "ReadRecord: pg_pread(logfile %u seg %u off %u) failed: %m", > readId, readSeg, readOff); > memset(readBuf + EndRecPtr.xrecoff % BLCKSZ, 0, > BLCKSZ - EndRecPtr.xrecoff % BLCKSZ); > - if (lseek(readFile, (off_t) (readOff * BLCKSZ), SEEK_SET) < 0) > - elog(STOP, "ReadRecord: lseek(logfile %u seg %u off %u) failed: %m", > - readId, readSeg, readOff); > - if (write(readFile, readBuf, BLCKSZ) != BLCKSZ) > - elog(STOP, "ReadRecord: write(logfile %u seg %u off %u) failed: %m", > + if (pg_pwrite(readFile, readBuf, BLCKSZ, (readOff * BLCKSZ)) != BLCKSZ) > + elog(STOP, "ReadRecord: pg_pwrite(logfile %u seg %u off %u) failed: %m", > readId, readSeg, readOff); > readOff++; > } > @@ -1797,6 +1806,28 @@ > return buf; > } > > + > +#ifdef _HAVE_MMAP > +static void > +ZeroMapInit(void) > +{ > + int zfd; > + > + zfd = BasicOpenFile("/dev/zero", O_RDONLY, 0); > + if (zfd < 0) { > + elog(LOG, "Can't open /dev/zero: %m"); > + return; > + } > + zmmap = mmap(NULL, XLogSegSize, PROT_READ, MAP_SHARED, zfd, 0); > + if (!zmmap) > + elog(LOG, "Can't mmap /dev/zero: %m"); > + close(zfd); > +} > +#else > +#define ZeroMapInit() > +#endif > + > + > /* > * This func must be called ONCE on system startup > */ > @@ -1811,6 +1842,9 @@ > char buffer[_INTL_MAXLOGRECSZ + SizeOfXLogRecord]; > > elog(LOG, "starting up"); > + > + ZeroMapInit(); > + > CritSectionCount++; > > XLogCtl->xlblocks = (XLogRecPtr *) (((char *) XLogCtl) + sizeof(XLogCtlData)); > > -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026
pgsql-patches by date: