Re: [COMMITTERS] pgsql: On Windows, when a file is deleted and another process still has - Mailing list pgsql-hackers
From | Heikki Linnakangas |
---|---|
Subject | Re: [COMMITTERS] pgsql: On Windows, when a file is deleted and another process still has |
Date | |
Msg-id | 4AAA0DFD.70500@enterprisedb.com Whole thread Raw |
Responses |
Re: Re: [COMMITTERS] pgsql: On Windows, when a file is deleted and another process still has
Re: Re: [COMMITTERS] pgsql: On Windows, when a file is deleted and another process still has |
List | pgsql-hackers |
(moving to pgsql-hackers) Tom Lane wrote: > Heikki Linnakangas <heikki.linnakangas@enterprisedb.com> writes: >> A completely different approach would be to treat any failure on all >> platforms as non-fatal. We shouldn't really cut the checkpoint short if >> recycling a WAL file fails, whatever the reason. That seems like a more >> robust approach than trying to guess which error codes are OK to ignore. > > I could live with that, as long as it gets logged. Here's a patch implementing that, and changing pgrename() to check for ERROR_SHARING_VIOLATION and ERROR_LOCK_VIOLATION like pgwin32_open() does, instead of ERROR_ACCESS_DENIED. I wonder if we should reduce the timeout in pgrename(). It's 30 s at the moment, but apparently it hasn't been working correctly, failing immediately instead if the file is locked. And no-one has complained about that. But if we sleep in InstallXLogFileSegment(), we're holding ControlFileLock, which can force other backends to wait, and that might cause more harm than just failing outright. Something like 5 seconds might be more appropriate, giving anti-virus and similar software some time to give up the lock, but not too much to cause long delays. 5 seconds should be enough for anti-virus or backup software to process a file under normal circumstances. OTOH, pgwin32_open() uses 30 s, with the same potential for lockups, and no-one has complained about that either. The bottom line is that if another program keeps a file locked for any extended period of time, you're going to have trouble one way or another. -- Heikki Linnakangas EnterpriseDB http://www.enterprisedb.com *** a/src/backend/access/transam/xlog.c --- b/src/backend/access/transam/xlog.c *************** *** 2262,2273 **** XLogFileInit(uint32 log, uint32 seg, *use_existent, &max_advance, use_lock)) { ! /* No need for any more future segments... */ unlink(tmppath); } - elog(DEBUG2, "done creating and filling new WAL file"); - /* Set flag to tell caller there was no existent file */ *use_existent = false; --- 2262,2275 ---- *use_existent, &max_advance, use_lock)) { ! /* ! * No need for any more future segments, or InstallXLogFileSegment() ! * failed to rename the file into place. If the rename failed, opening ! * the file below will fail. ! */ unlink(tmppath); } /* Set flag to tell caller there was no existent file */ *use_existent = false; *************** *** 2280,2285 **** XLogFileInit(uint32 log, uint32 seg, --- 2282,2289 ---- errmsg("could not open file \"%s\" (log file %u, segment %u): %m", path, log, seg))); + elog(DEBUG2, "done creating and filling new WAL file"); + return fd; } *************** *** 2409,2418 **** XLogFileCopy(uint32 log, uint32 seg, * place. This should be TRUE except during bootstrap log creation. The * caller must *not* hold the lock at call. * ! * Returns TRUE if file installed, FALSE if not installed because of ! * exceeding max_advance limit. On Windows, we also return FALSE if we ! * can't rename the file into place because someone's got it open. ! * (Any other kind of failure causes ereport().) */ static bool InstallXLogFileSegment(uint32 *log, uint32 *seg, char *tmppath, --- 2413,2421 ---- * place. This should be TRUE except during bootstrap log creation. The * caller must *not* hold the lock at call. * ! * Returns TRUE if the file was installed successfully. FALSE indicates that ! * max_advance limit was exceeded, or an error occurred while renaming the ! * file into place. */ static bool InstallXLogFileSegment(uint32 *log, uint32 *seg, char *tmppath, *************** *** 2460,2490 **** InstallXLogFileSegment(uint32 *log, uint32 *seg, char *tmppath, */ #if HAVE_WORKING_LINK if (link(tmppath, path) < 0) ! ereport(ERROR, (errcode_for_file_access(), errmsg("could not link file \"%s\" to \"%s\" (initialization of log file %u, segment %u): %m", tmppath, path, *log, *seg))); unlink(tmppath); #else if (rename(tmppath, path) < 0) { ! #ifdef WIN32 ! #if !defined(__CYGWIN__) ! if (GetLastError() == ERROR_ACCESS_DENIED) ! #else ! if (errno == EACCES) ! #endif ! { ! if (use_lock) ! LWLockRelease(ControlFileLock); ! return false; ! } ! #endif /* WIN32 */ ! ! ereport(ERROR, (errcode_for_file_access(), errmsg("could not rename file \"%s\" to \"%s\" (initialization of log file %u, segment %u): %m", tmppath, path, *log, *seg))); } #endif --- 2463,2488 ---- */ #if HAVE_WORKING_LINK if (link(tmppath, path) < 0) ! { ! if (use_lock) ! LWLockRelease(ControlFileLock); ! ereport(LOG, (errcode_for_file_access(), errmsg("could not link file \"%s\" to \"%s\" (initialization of log file %u, segment %u): %m", tmppath, path, *log, *seg))); + return false; + } unlink(tmppath); #else if (rename(tmppath, path) < 0) { ! if (use_lock) ! LWLockRelease(ControlFileLock); ! ereport(LOG, (errcode_for_file_access(), errmsg("could not rename file \"%s\" to \"%s\" (initialization of log file %u, segment %u): %m", tmppath, path, *log, *seg))); + return false; } #endif *************** *** 3128,3146 **** RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr) */ snprintf(newpath, MAXPGPATH, "%s.deleted", path); if (rename(path, newpath) != 0) ! ereport(ERROR, (errcode_for_file_access(), ! errmsg("could not rename old transaction log file \"%s\"", path))); rc = unlink(newpath); #else rc = unlink(path); #endif if (rc != 0) ! ereport(ERROR, (errcode_for_file_access(), errmsg("could not remove old transaction log file \"%s\": %m", path))); CheckpointStats.ckpt_segs_removed++; } --- 3126,3150 ---- */ snprintf(newpath, MAXPGPATH, "%s.deleted", path); if (rename(path, newpath) != 0) ! { ! ereport(LOG, (errcode_for_file_access(), ! errmsg("could not rename old transaction log file \"%s\": %m", path))); + continue; + } rc = unlink(newpath); #else rc = unlink(path); #endif if (rc != 0) ! { ! ereport(LOG, (errcode_for_file_access(), errmsg("could not remove old transaction log file \"%s\": %m", path))); + continue; + } CheckpointStats.ckpt_segs_removed++; } *** a/src/port/dirmod.c --- b/src/port/dirmod.c *************** *** 129,139 **** pgrename(const char *from, const char *to) #endif { #if defined(WIN32) && !defined(__CYGWIN__) ! if (GetLastError() != ERROR_ACCESS_DENIED) #else if (errno != EACCES) #endif - /* set errno? */ return -1; if (++loops > 300) /* time out after 30 sec */ return -1; --- 129,143 ---- #endif { #if defined(WIN32) && !defined(__CYGWIN__) ! DWORD err = GetLastError(); ! ! _dosmaperr(err); ! ! if (err != ERROR_SHARING_VIOLATION && ! err != ERROR_LOCK_VIOLATION) #else if (errno != EACCES) #endif return -1; if (++loops > 300) /* time out after 30 sec */ return -1;
pgsql-hackers by date: