Thread: A patch for xlog.c
Hi, Here is a patch against 7.1beta5 to use mmap(), and thus a single write, to initialise xlogs. It may well improve performance of this on platforms/filesystems which write metadata synchronously. It needs a configure test, but certainly builds and runs OK. It also wraps the file reopening in an "ifdef WIN32", since it certainly isn't needed for UNIX-like platforms (which I assume includes BeOS). Matthew. diff -ruN postgresql-7.1beta5-clean/src/backend/access/transam/xlog.c postgresql-7.1beta5/src/backend/access/transam/xlog.c --- postgresql-7.1beta5-clean/src/backend/access/transam/xlog.c Fri Feb 23 18:12:00 2001 +++ postgresql-7.1beta5/src/backend/access/transam/xlog.c Sat Feb 24 15:23:41 2001 @@ -24,6 +24,10 @@ #include <locale.h> #endif +#ifdef _HAVE_MMAP +#include <sys/mman.h> +#endif + #include "access/transam.h" #include "access/xact.h" #include "catalog/catversion.h" @@ -36,6 +40,7 @@ #include "access/xlogutils.h" #include "utils/builtins.h" #include "utils/relcache.h" +#include "utils/pfile.h" #include "miscadmin.h" @@ -53,6 +58,10 @@ StartUpID ThisStartUpID = 0; XLogRecPtr RedoRecPtr; +#ifdef _HAVE_MMAP +void *zmmap = NULL; +#endif + int XLOG_DEBUG = 0; /* To read/update control file and create new log file */ @@ -955,7 +964,6 @@ { char path[MAXPGPATH]; char tpath[MAXPGPATH]; - char zbuffer[BLCKSZ]; int fd; int nbytes; @@ -987,28 +995,36 @@ elog(STOP, "InitCreate(logfile %u seg %u) failed: %m", logId, logSeg); - /* - * Zero-fill the file. We have to do this the hard way to ensure that - * all the file space has really been allocated --- on platforms that - * allow "holes" in files, just seeking to the end doesn't allocate - * intermediate space. This way, we know that we have all the space - * and (after the fsync below) that all the indirect blocks are down - * on disk. Therefore, fdatasync(2) will be sufficient to sync future - * writes to the log file. - */ - MemSet(zbuffer, 0, sizeof(zbuffer)); - for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(zbuffer)) +#ifdef _HAVE_MMAP + if (!zmmap || (write(fd, zmmap, XLogSegSize) != XLogSegSize)) +#endif { - if ((int) write(fd, zbuffer, sizeof(zbuffer)) != (int) sizeof(zbuffer)) - elog(STOP, "ZeroFill(logfile %u seg %u) failed: %m", - logId, logSeg); + /* + * Zero-fill the file. We have to do this the hard way to ensure that + * all the file space has really been allocated --- on platforms that + * allow "holes" in files, just seeking to the end doesn't allocate + * intermediate space. This way, we know that we have all the space + * and (after the fsync below) that all the indirect blocks are down + * on disk. Therefore, fdatasync(2) will be sufficient to sync future + * writes to the log file. + */ + char zbuffer[BLCKSZ]; + MemSet(zbuffer, 0, sizeof(zbuffer)); + for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(zbuffer)) + { + if ((int) write(fd, zbuffer, sizeof(zbuffer)) != (int) sizeof(zbuffer)) + elog(STOP, "ZeroFill(logfile %u seg %u) failed: %m", + logId, logSeg); + } } if (pg_fsync(fd) != 0) elog(STOP, "fsync(logfile %u seg %u) failed: %m", logId, logSeg); +#ifdef WIN32 close(fd); +#endif /* * Prefer link() to rename() here just to be sure that we don't overwrite @@ -1026,10 +1042,12 @@ logId, logSeg); #endif +#ifdef WIN32 fd = BasicOpenFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR); if (fd < 0) elog(STOP, "InitReopen(logfile %u seg %u) failed: %m", logId, logSeg); +#endif return (fd); } @@ -1255,11 +1273,8 @@ if (noBlck || readOff != (RecPtr->xrecoff % XLogSegSize) / BLCKSZ) { readOff = (RecPtr->xrecoff % XLogSegSize) / BLCKSZ; - if (lseek(readFile, (off_t) (readOff * BLCKSZ), SEEK_SET) < 0) - elog(STOP, "ReadRecord: lseek(logfile %u seg %u off %u) failed: %m", - readId, readSeg, readOff); - if (read(readFile, readBuf, BLCKSZ) != BLCKSZ) - elog(STOP, "ReadRecord: read(logfile %u seg %u off %u) failed: %m", + if (pg_pread(readFile, readBuf, BLCKSZ, (readOff * BLCKSZ)) != BLCKSZ) + elog(STOP, "ReadRecord: pg_pread(logfile %u seg %u off %u) failed: %m", readId, readSeg, readOff); if (((XLogPageHeader) readBuf)->xlp_magic != XLOG_PAGE_MAGIC) { @@ -1415,19 +1430,13 @@ elog(LOG, "Formatting logfile %u seg %u block %u at offset %u", readId, readSeg, readOff, EndRecPtr.xrecoff % BLCKSZ); readFile = XLogFileOpen(readId, readSeg, false); - if (lseek(readFile, (off_t) (readOff * BLCKSZ), SEEK_SET) < 0) - elog(STOP, "ReadRecord: lseek(logfile %u seg %u off %u) failed: %m", - readId, readSeg, readOff); - if (read(readFile, readBuf, BLCKSZ) != BLCKSZ) - elog(STOP, "ReadRecord: read(logfile %u seg %u off %u) failed: %m", + if (pg_pread(readFile, readBuf, BLCKSZ, (readOff * BLCKSZ)) != BLCKSZ) + elog(STOP, "ReadRecord: pg_pread(logfile %u seg %u off %u) failed: %m", readId, readSeg, readOff); memset(readBuf + EndRecPtr.xrecoff % BLCKSZ, 0, BLCKSZ - EndRecPtr.xrecoff % BLCKSZ); - if (lseek(readFile, (off_t) (readOff * BLCKSZ), SEEK_SET) < 0) - elog(STOP, "ReadRecord: lseek(logfile %u seg %u off %u) failed: %m", - readId, readSeg, readOff); - if (write(readFile, readBuf, BLCKSZ) != BLCKSZ) - elog(STOP, "ReadRecord: write(logfile %u seg %u off %u) failed: %m", + if (pg_pwrite(readFile, readBuf, BLCKSZ, (readOff * BLCKSZ)) != BLCKSZ) + elog(STOP, "ReadRecord: pg_pwrite(logfile %u seg %u off %u) failed: %m", readId, readSeg, readOff); readOff++; } @@ -1797,6 +1806,28 @@ return buf; } + +#ifdef _HAVE_MMAP +static void +ZeroMapInit(void) +{ + int zfd; + + zfd = BasicOpenFile("/dev/zero", O_RDONLY, 0); + if (zfd < 0) { + elog(LOG, "Can't open /dev/zero: %m"); + return; + } + zmmap = mmap(NULL, XLogSegSize, PROT_READ, MAP_SHARED, zfd, 0); + if (!zmmap) + elog(LOG, "Can't mmap /dev/zero: %m"); + close(zfd); +} +#else +#define ZeroMapInit() +#endif + + /* * This func must be called ONCE on system startup */ @@ -1811,6 +1842,9 @@ char buffer[_INTL_MAXLOGRECSZ + SizeOfXLogRecord]; elog(LOG, "starting up"); + + ZeroMapInit(); + CritSectionCount++; XLogCtl->xlblocks = (XLogRecPtr *) (((char *) XLogCtl) + sizeof(XLogCtlData));
Matthew Kirkwood <matthew@hairy.beasts.org> writes: > Here is a patch against 7.1beta5 to use mmap(), and thus a > single write, to initialise xlogs. It may well improve > performance of this on platforms/filesystems which write > metadata synchronously. Have you *demonstrated* any actual performance improvement from this? How much? On what platforms? I don't believe in adding unportable alternative implementations without pretty darn compelling reasons ... regards, tom lane
On Sat, 24 Feb 2001, Tom Lane wrote: > > Here is a patch against 7.1beta5 to use mmap(), and thus a > > single write, to initialise xlogs. It may well improve > > performance of this on platforms/filesystems which write > > metadata synchronously. > > Have you *demonstrated* any actual performance improvement from this? > How much? On what platforms? Forgive me if I posted it to the wrong place -- I was far from proposing this for inclusion. It is but a small step on the way to my plan of mmap()ifying all of the WAL stuff (which may also prove a waste of effort). On Linux 2.4 w/asynchronous ext2, it's good for about 5%, which certainly wouldn't alone be worth the effort. I tried synchronous ext2, but the numbers were so poor with both that nobody who cared about performance would be using it (1.2 sec per file, vs. over a minute). I don't have access to any kind machine running UFS/FFS. Perhaps someone on the list might do me the favour of trying the attached test on such a platform with synchronous metadata writes (see top of file for #ifdefs). > I don't believe in adding unportable alternative implementations > without pretty darn compelling reasons ... mmap() is hardly unportable. From a quick look, all the current names in include/port/ (which must surely make up a vast majority of deployed recent postgresql versions) except QNX and Win32 can support POSIX mmap. Thanks for the reply, Matthew.
Attachment
I am confused why mmap() is better than writing to a real file. Don't we need to write to a real file so it is available for database recovery? > Hi, > > Here is a patch against 7.1beta5 to use mmap(), and thus a > single write, to initialise xlogs. It may well improve > performance of this on platforms/filesystems which write > metadata synchronously. > > It needs a configure test, but certainly builds and runs > OK. > > It also wraps the file reopening in an "ifdef WIN32", since > it certainly isn't needed for UNIX-like platforms (which I > assume includes BeOS). > > Matthew. > > > diff -ruN postgresql-7.1beta5-clean/src/backend/access/transam/xlog.c postgresql-7.1beta5/src/backend/access/transam/xlog.c > --- postgresql-7.1beta5-clean/src/backend/access/transam/xlog.c Fri Feb 23 18:12:00 2001 > +++ postgresql-7.1beta5/src/backend/access/transam/xlog.c Sat Feb 24 15:23:41 2001 > @@ -24,6 +24,10 @@ > #include <locale.h> > #endif > > +#ifdef _HAVE_MMAP > +#include <sys/mman.h> > +#endif > + > #include "access/transam.h" > #include "access/xact.h" > #include "catalog/catversion.h" > @@ -36,6 +40,7 @@ > #include "access/xlogutils.h" > #include "utils/builtins.h" > #include "utils/relcache.h" > +#include "utils/pfile.h" > > #include "miscadmin.h" > > @@ -53,6 +58,10 @@ > StartUpID ThisStartUpID = 0; > XLogRecPtr RedoRecPtr; > > +#ifdef _HAVE_MMAP > +void *zmmap = NULL; > +#endif > + > int XLOG_DEBUG = 0; > > /* To read/update control file and create new log file */ > @@ -955,7 +964,6 @@ > { > char path[MAXPGPATH]; > char tpath[MAXPGPATH]; > - char zbuffer[BLCKSZ]; > int fd; > int nbytes; > > @@ -987,28 +995,36 @@ > elog(STOP, "InitCreate(logfile %u seg %u) failed: %m", > logId, logSeg); > > - /* > - * Zero-fill the file. We have to do this the hard way to ensure that > - * all the file space has really been allocated --- on platforms that > - * allow "holes" in files, just seeking to the end doesn't allocate > - * intermediate space. This way, we know that we have all the space > - * and (after the fsync below) that all the indirect blocks are down > - * on disk. Therefore, fdatasync(2) will be sufficient to sync future > - * writes to the log file. > - */ > - MemSet(zbuffer, 0, sizeof(zbuffer)); > - for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(zbuffer)) > +#ifdef _HAVE_MMAP > + if (!zmmap || (write(fd, zmmap, XLogSegSize) != XLogSegSize)) > +#endif > { > - if ((int) write(fd, zbuffer, sizeof(zbuffer)) != (int) sizeof(zbuffer)) > - elog(STOP, "ZeroFill(logfile %u seg %u) failed: %m", > - logId, logSeg); > + /* > + * Zero-fill the file. We have to do this the hard way to ensure that > + * all the file space has really been allocated --- on platforms that > + * allow "holes" in files, just seeking to the end doesn't allocate > + * intermediate space. This way, we know that we have all the space > + * and (after the fsync below) that all the indirect blocks are down > + * on disk. Therefore, fdatasync(2) will be sufficient to sync future > + * writes to the log file. > + */ > + char zbuffer[BLCKSZ]; > + MemSet(zbuffer, 0, sizeof(zbuffer)); > + for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(zbuffer)) > + { > + if ((int) write(fd, zbuffer, sizeof(zbuffer)) != (int) sizeof(zbuffer)) > + elog(STOP, "ZeroFill(logfile %u seg %u) failed: %m", > + logId, logSeg); > + } > } > > if (pg_fsync(fd) != 0) > elog(STOP, "fsync(logfile %u seg %u) failed: %m", > logId, logSeg); > > +#ifdef WIN32 > close(fd); > +#endif > > /* > * Prefer link() to rename() here just to be sure that we don't overwrite > @@ -1026,10 +1042,12 @@ > logId, logSeg); > #endif > > +#ifdef WIN32 > fd = BasicOpenFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR); > if (fd < 0) > elog(STOP, "InitReopen(logfile %u seg %u) failed: %m", > logId, logSeg); > +#endif > > return (fd); > } > @@ -1255,11 +1273,8 @@ > if (noBlck || readOff != (RecPtr->xrecoff % XLogSegSize) / BLCKSZ) > { > readOff = (RecPtr->xrecoff % XLogSegSize) / BLCKSZ; > - if (lseek(readFile, (off_t) (readOff * BLCKSZ), SEEK_SET) < 0) > - elog(STOP, "ReadRecord: lseek(logfile %u seg %u off %u) failed: %m", > - readId, readSeg, readOff); > - if (read(readFile, readBuf, BLCKSZ) != BLCKSZ) > - elog(STOP, "ReadRecord: read(logfile %u seg %u off %u) failed: %m", > + if (pg_pread(readFile, readBuf, BLCKSZ, (readOff * BLCKSZ)) != BLCKSZ) > + elog(STOP, "ReadRecord: pg_pread(logfile %u seg %u off %u) failed: %m", > readId, readSeg, readOff); > if (((XLogPageHeader) readBuf)->xlp_magic != XLOG_PAGE_MAGIC) > { > @@ -1415,19 +1430,13 @@ > elog(LOG, "Formatting logfile %u seg %u block %u at offset %u", > readId, readSeg, readOff, EndRecPtr.xrecoff % BLCKSZ); > readFile = XLogFileOpen(readId, readSeg, false); > - if (lseek(readFile, (off_t) (readOff * BLCKSZ), SEEK_SET) < 0) > - elog(STOP, "ReadRecord: lseek(logfile %u seg %u off %u) failed: %m", > - readId, readSeg, readOff); > - if (read(readFile, readBuf, BLCKSZ) != BLCKSZ) > - elog(STOP, "ReadRecord: read(logfile %u seg %u off %u) failed: %m", > + if (pg_pread(readFile, readBuf, BLCKSZ, (readOff * BLCKSZ)) != BLCKSZ) > + elog(STOP, "ReadRecord: pg_pread(logfile %u seg %u off %u) failed: %m", > readId, readSeg, readOff); > memset(readBuf + EndRecPtr.xrecoff % BLCKSZ, 0, > BLCKSZ - EndRecPtr.xrecoff % BLCKSZ); > - if (lseek(readFile, (off_t) (readOff * BLCKSZ), SEEK_SET) < 0) > - elog(STOP, "ReadRecord: lseek(logfile %u seg %u off %u) failed: %m", > - readId, readSeg, readOff); > - if (write(readFile, readBuf, BLCKSZ) != BLCKSZ) > - elog(STOP, "ReadRecord: write(logfile %u seg %u off %u) failed: %m", > + if (pg_pwrite(readFile, readBuf, BLCKSZ, (readOff * BLCKSZ)) != BLCKSZ) > + elog(STOP, "ReadRecord: pg_pwrite(logfile %u seg %u off %u) failed: %m", > readId, readSeg, readOff); > readOff++; > } > @@ -1797,6 +1806,28 @@ > return buf; > } > > + > +#ifdef _HAVE_MMAP > +static void > +ZeroMapInit(void) > +{ > + int zfd; > + > + zfd = BasicOpenFile("/dev/zero", O_RDONLY, 0); > + if (zfd < 0) { > + elog(LOG, "Can't open /dev/zero: %m"); > + return; > + } > + zmmap = mmap(NULL, XLogSegSize, PROT_READ, MAP_SHARED, zfd, 0); > + if (!zmmap) > + elog(LOG, "Can't mmap /dev/zero: %m"); > + close(zfd); > +} > +#else > +#define ZeroMapInit() > +#endif > + > + > /* > * This func must be called ONCE on system startup > */ > @@ -1811,6 +1842,9 @@ > char buffer[_INTL_MAXLOGRECSZ + SizeOfXLogRecord]; > > elog(LOG, "starting up"); > + > + ZeroMapInit(); > + > CritSectionCount++; > > XLogCtl->xlblocks = (XLogRecPtr *) (((char *) XLogCtl) + sizeof(XLogCtlData)); > > -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026
Matthew Kirkwood <matthew@hairy.beasts.org> writes: > Forgive me if I posted it to the wrong place -- I was far from > proposing this for inclusion. Diffs posted to pgsql-patches are generally considered to be requests for application of a patch. If this is only an experiment it had best be clearly labeled as such. > It is but a small step on the way to my plan of mmap()ifying all of > the WAL stuff (which may also prove a waste of effort). Very probably. What are your grounds for thinking that's a good idea? I can't see any reason to think that mmap is more efficient than write for simple sequential writes, which is what we need to do. regards, tom lane
On Sat, 24 Feb 2001, Bruce Momjian wrote: > I am confused why mmap() is better than writing to a real file. It isn't, except that it allows to initialise the logfile in one syscall, without first allocating and zeroing (and hence dirtying) 16Mb of memory. > Don't we need to write to a real file so it is available for database > recovery? The mmap isn't used for the destination, but for the source; it's just a cheap way to get your hands on 16Mb of zeroes. Matthew.
On Sat, 24 Feb 2001, Tom Lane wrote: > > Forgive me if I posted it to the wrong place -- I was far from > > proposing this for inclusion. > > Diffs posted to pgsql-patches are generally considered to be requests > for application of a patch. If this is only an experiment it had best > be clearly labeled as such. OK. Is there are better place for discussion of such? > > It is but a small step on the way to my plan of mmap()ifying all > > of the WAL stuff (which may also prove a waste of effort). > > Very probably. What are your grounds for thinking that's a good idea? > I can't see any reason to think that mmap is more efficient than write > for simple sequential writes, which is what we need to do. Potential pros: a. msync(MS_ASYNC) seems to be exactly b. Potential to reduce contention c. Removing syscalls is rarely a bad thing d. Fewer copies, better cache behaviour Potential cons: a. Portability b. A bad pointer can cause a scribble on the log Matthew.
Matthew Kirkwood <matthew@hairy.beasts.org> writes: >> Diffs posted to pgsql-patches are generally considered to be requests >> for application of a patch. If this is only an experiment it had best >> be clearly labeled as such. > OK. Is there are better place for discussion of such? pgsql-hackers is the place to discuss anything that's experimental or otherwise concerned with future development. > [ possible merits of mmap ] Let's take up that discussion in pghackers. regards, tom lane
> Matthew Kirkwood <matthew@hairy.beasts.org> writes: > >> Diffs posted to pgsql-patches are generally considered to be requests > >> for application of a patch. If this is only an experiment it had best > >> be clearly labeled as such. > > > OK. Is there are better place for discussion of such? > > pgsql-hackers is the place to discuss anything that's experimental or > otherwise concerned with future development. > > > [ possible merits of mmap ] > > Let's take up that discussion in pghackers. I always felt the real benefit of mmap() would be to remove use of SysV shared memory and use anon mmap() to prevent problems with SysV share memory limits. -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026
On Sat, 24 Feb 2001, Bruce Momjian wrote: > > Matthew Kirkwood <matthew@hairy.beasts.org> writes: > > >> Diffs posted to pgsql-patches are generally considered to be requests > > >> for application of a patch. If this is only an experiment it had best > > >> be clearly labeled as such. > > > > > OK. Is there are better place for discussion of such? > > > > pgsql-hackers is the place to discuss anything that's experimental or > > otherwise concerned with future development. > > > > > [ possible merits of mmap ] > > > > Let's take up that discussion in pghackers. > > I always felt the real benefit of mmap() would be to remove use of SysV > shared memory and use anon mmap() to prevent problems with SysV share > memory limits. You'll still have memory limits to overcome ... per user memory limits being one ... there is no such thing as a 'cure-all' ...
> > > pgsql-hackers is the place to discuss anything that's experimental or > > > otherwise concerned with future development. > > > > > > > [ possible merits of mmap ] > > > > > > Let's take up that discussion in pghackers. > > > > I always felt the real benefit of mmap() would be to remove use of SysV > > shared memory and use anon mmap() to prevent problems with SysV share > > memory limits. > > You'll still have memory limits to overcome ... per user memory limits > being one ... there is no such thing as a 'cure-all' ... Yes, but typical SysV shared memory limits are much lower than per-process limits. -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026
On Sun, 25 Feb 2001, Bruce Momjian wrote: > > > > pgsql-hackers is the place to discuss anything that's experimental or > > > > otherwise concerned with future development. > > > > > > > > > [ possible merits of mmap ] > > > > > > > > Let's take up that discussion in pghackers. > > > > > > I always felt the real benefit of mmap() would be to remove use of SysV > > > shared memory and use anon mmap() to prevent problems with SysV share > > > memory limits. > > > > You'll still have memory limits to overcome ... per user memory limits > > being one ... there is no such thing as a 'cure-all' ... > > Yes, but typical SysV shared memory limits are much lower than > per-process limits. well, come up with suitable patches for v7.2 and we can see where it goes ... you seem to think mmap() will do what we require, but, so far, have been unable to convince anyone to dedicate the time to converting to using it. "having to raise/set SysV limits", IMHO, isn't worth the overhaul that I see having to happen, but, if you can show us the benefits of doing it other then removing a 'one time administrative config' of an OS, I imagine that nobody will be able to argue it ...
> > Yes, but typical SysV shared memory limits are much lower than > > per-process limits. > > well, come up with suitable patches for v7.2 and we can see where it goes > ... you seem to think mmap() will do what we require, but, so far, have > been unable to convince anyone to dedicate the time to converting to using > it. "having to raise/set SysV limits", IMHO, isn't worth the overhaul > that I see having to happen, but, if you can show us the benefits of doing > it other then removing a 'one time administrative config' of an OS, I > imagine that nobody will be able to argue it ... Yea, it is pretty low priority, especially since most OS's don't support ANON mmap(). Most BSD's support it, but I don't think Linux or others do. -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026
On Sun, 25 Feb 2001, Bruce Momjian wrote: > > > Yes, but typical SysV shared memory limits are much lower than > > > per-process limits. > > > > well, come up with suitable patches for v7.2 and we can see where it goes > > ... you seem to think mmap() will do what we require, but, so far, have > > been unable to convince anyone to dedicate the time to converting to using > > it. "having to raise/set SysV limits", IMHO, isn't worth the overhaul > > that I see having to happen, but, if you can show us the benefits of doing > > it other then removing a 'one time administrative config' of an OS, I > > imagine that nobody will be able to argue it ... > > Yea, it is pretty low priority, especially since most OS's don't support > ANON mmap(). Most BSD's support it, but I don't think Linux or others > do. ah, then not a low priority, a non-starter, period ... maybe when all the OSs we support move to supporting ANON mmap() :(
> On Sun, 25 Feb 2001, Bruce Momjian wrote: > > > > > Yes, but typical SysV shared memory limits are much lower than > > > > per-process limits. > > > > > > well, come up with suitable patches for v7.2 and we can see where it goes > > > ... you seem to think mmap() will do what we require, but, so far, have > > > been unable to convince anyone to dedicate the time to converting to using > > > it. "having to raise/set SysV limits", IMHO, isn't worth the overhaul > > > that I see having to happen, but, if you can show us the benefits of doing > > > it other then removing a 'one time administrative config' of an OS, I > > > imagine that nobody will be able to argue it ... > > > > Yea, it is pretty low priority, especially since most OS's don't support > > ANON mmap(). Most BSD's support it, but I don't think Linux or others > > do. > > ah, then not a low priority, a non-starter, period ... maybe when all the > OSs we support move to supporting ANON mmap() :( Yea, we would have to take a poll to see if the majority support it. Right now, I think it is clearly a minority, and not worth the added confusion for a few platforms. -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026
The Hermit Hacker writes: > > Yea, it is pretty low priority, especially since most OS's don't support > > ANON mmap(). Most BSD's support it, but I don't think Linux or others > > do. > > ah, then not a low priority, a non-starter, period ... maybe when all the > OSs we support move to supporting ANON mmap() :( It would be worthwhile for those operating systems that don't have SysV shared memory but do have mmap(). But I don't have one of those, so I ain't gonna do it. ;-) -- Peter Eisentraut peter_e@gmx.net http://yi.org/peter-e/
> The Hermit Hacker writes: > > > > Yea, it is pretty low priority, especially since most OS's don't support > > > ANON mmap(). Most BSD's support it, but I don't think Linux or others > > > do. > > > > ah, then not a low priority, a non-starter, period ... maybe when all the > > OSs we support move to supporting ANON mmap() :( > > It would be worthwhile for those operating systems that don't have SysV > shared memory but do have mmap(). But I don't have one of those, so I > ain't gonna do it. ;-) All have SysV memory. mmap() usage is only useful in enabling larger buffers without kernel changes. -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026
On Tue, 27 Feb 2001, Bruce Momjian wrote: > mmap() usage is only useful in enabling larger > buffers without kernel changes. My plan was not to replace the shared buffer pool with an mmap()ed area, but rather to use mmap() on the data files themselves to eliminate it. Clearly this is rather controversial, since it may have safety implications, but it should allow the kernel better to choose what to cache. Matthew.
Bruce Momjian writes: > All have SysV memory. All that we currently support... -- Peter Eisentraut peter_e@gmx.net http://yi.org/peter-e/