Re: [COMMITTERS] pgsql: On Windows, when a file is deleted and another process still has - Mailing list pgsql-hackers

From Heikki Linnakangas
Subject Re: [COMMITTERS] pgsql: On Windows, when a file is deleted and another process still has
Date
Msg-id 4AAA0DFD.70500@enterprisedb.com
Whole thread Raw
Responses Re: Re: [COMMITTERS] pgsql: On Windows, when a file is deleted and another process still has
Re: Re: [COMMITTERS] pgsql: On Windows, when a file is deleted and another process still has
List pgsql-hackers
(moving to pgsql-hackers)

Tom Lane wrote:
> Heikki Linnakangas <heikki.linnakangas@enterprisedb.com> writes:
>> A completely different approach would be to treat any failure on all
>> platforms as non-fatal. We shouldn't really cut the checkpoint short if
>> recycling a WAL file fails, whatever the reason. That seems like a more
>> robust approach than trying to guess which error codes are OK to ignore.
>
> I could live with that, as long as it gets logged.

Here's a patch implementing that, and changing pgrename() to check for
ERROR_SHARING_VIOLATION and ERROR_LOCK_VIOLATION like pgwin32_open()
does, instead of ERROR_ACCESS_DENIED.

I wonder if we should reduce the timeout in pgrename(). It's 30 s at the
moment, but apparently it hasn't been working correctly, failing
immediately instead if the file is locked. And no-one has complained
about that. But if we sleep in InstallXLogFileSegment(), we're holding
ControlFileLock, which can force other backends to wait, and that might
cause more harm than just failing outright. Something like 5 seconds
might be more appropriate, giving anti-virus and similar software some
time to give up the lock, but not too much to cause long delays. 5
seconds should be enough for anti-virus or backup software to process a
file under normal circumstances.

OTOH, pgwin32_open() uses 30 s, with the same potential for lockups, and
no-one has complained about that either. The bottom line is that if
another program keeps a file locked for any extended period of time,
you're going to have trouble one way or another.

--
  Heikki Linnakangas
  EnterpriseDB   http://www.enterprisedb.com
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
***************
*** 2262,2273 **** XLogFileInit(uint32 log, uint32 seg,
                                  *use_existent, &max_advance,
                                  use_lock))
      {
!         /* No need for any more future segments... */
          unlink(tmppath);
      }

-     elog(DEBUG2, "done creating and filling new WAL file");
-
      /* Set flag to tell caller there was no existent file */
      *use_existent = false;

--- 2262,2275 ----
                                  *use_existent, &max_advance,
                                  use_lock))
      {
!         /*
!          * No need for any more future segments, or InstallXLogFileSegment()
!          * failed to rename the file into place. If the rename failed, opening
!          * the file below will fail.
!          */
          unlink(tmppath);
      }

      /* Set flag to tell caller there was no existent file */
      *use_existent = false;

***************
*** 2280,2285 **** XLogFileInit(uint32 log, uint32 seg,
--- 2282,2289 ----
             errmsg("could not open file \"%s\" (log file %u, segment %u): %m",
                    path, log, seg)));

+     elog(DEBUG2, "done creating and filling new WAL file");
+
      return fd;
  }

***************
*** 2409,2418 **** XLogFileCopy(uint32 log, uint32 seg,
   * place.  This should be TRUE except during bootstrap log creation.  The
   * caller must *not* hold the lock at call.
   *
!  * Returns TRUE if file installed, FALSE if not installed because of
!  * exceeding max_advance limit.  On Windows, we also return FALSE if we
!  * can't rename the file into place because someone's got it open.
!  * (Any other kind of failure causes ereport().)
   */
  static bool
  InstallXLogFileSegment(uint32 *log, uint32 *seg, char *tmppath,
--- 2413,2421 ----
   * place.  This should be TRUE except during bootstrap log creation.  The
   * caller must *not* hold the lock at call.
   *
!  * Returns TRUE if the file was installed successfully. FALSE indicates that
!  * max_advance limit was exceeded, or an error occurred while renaming the
!  * file into place.
   */
  static bool
  InstallXLogFileSegment(uint32 *log, uint32 *seg, char *tmppath,
***************
*** 2460,2490 **** InstallXLogFileSegment(uint32 *log, uint32 *seg, char *tmppath,
       */
  #if HAVE_WORKING_LINK
      if (link(tmppath, path) < 0)
!         ereport(ERROR,
                  (errcode_for_file_access(),
                   errmsg("could not link file \"%s\" to \"%s\" (initialization of log file %u, segment %u): %m",
                          tmppath, path, *log, *seg)));
      unlink(tmppath);
  #else
      if (rename(tmppath, path) < 0)
      {
! #ifdef WIN32
! #if !defined(__CYGWIN__)
!         if (GetLastError() == ERROR_ACCESS_DENIED)
! #else
!         if (errno == EACCES)
! #endif
!         {
!             if (use_lock)
!                 LWLockRelease(ControlFileLock);
!             return false;
!         }
! #endif   /* WIN32 */
!
!         ereport(ERROR,
                  (errcode_for_file_access(),
                   errmsg("could not rename file \"%s\" to \"%s\" (initialization of log file %u, segment %u): %m",
                          tmppath, path, *log, *seg)));
      }
  #endif

--- 2463,2488 ----
       */
  #if HAVE_WORKING_LINK
      if (link(tmppath, path) < 0)
!     {
!         if (use_lock)
!             LWLockRelease(ControlFileLock);
!         ereport(LOG,
                  (errcode_for_file_access(),
                   errmsg("could not link file \"%s\" to \"%s\" (initialization of log file %u, segment %u): %m",
                          tmppath, path, *log, *seg)));
+         return false;
+     }
      unlink(tmppath);
  #else
      if (rename(tmppath, path) < 0)
      {
!         if (use_lock)
!             LWLockRelease(ControlFileLock);
!         ereport(LOG,
                  (errcode_for_file_access(),
                   errmsg("could not rename file \"%s\" to \"%s\" (initialization of log file %u, segment %u): %m",
                          tmppath, path, *log, *seg)));
+         return false;
      }
  #endif

***************
*** 3128,3146 **** RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr)
                       */
                      snprintf(newpath, MAXPGPATH, "%s.deleted", path);
                      if (rename(path, newpath) != 0)
!                         ereport(ERROR,
                                  (errcode_for_file_access(),
!                                  errmsg("could not rename old transaction log file \"%s\"",
                                          path)));
                      rc = unlink(newpath);
  #else
                      rc = unlink(path);
  #endif
                      if (rc != 0)
!                         ereport(ERROR,
                                  (errcode_for_file_access(),
                                   errmsg("could not remove old transaction log file \"%s\": %m",
                                          path)));
                      CheckpointStats.ckpt_segs_removed++;
                  }

--- 3126,3150 ----
                       */
                      snprintf(newpath, MAXPGPATH, "%s.deleted", path);
                      if (rename(path, newpath) != 0)
!                     {
!                         ereport(LOG,
                                  (errcode_for_file_access(),
!                                  errmsg("could not rename old transaction log file \"%s\": %m",
                                          path)));
+                         continue;
+                     }
                      rc = unlink(newpath);
  #else
                      rc = unlink(path);
  #endif
                      if (rc != 0)
!                     {
!                         ereport(LOG,
                                  (errcode_for_file_access(),
                                   errmsg("could not remove old transaction log file \"%s\": %m",
                                          path)));
+                         continue;
+                     }
                      CheckpointStats.ckpt_segs_removed++;
                  }

*** a/src/port/dirmod.c
--- b/src/port/dirmod.c
***************
*** 129,139 **** pgrename(const char *from, const char *to)
  #endif
      {
  #if defined(WIN32) && !defined(__CYGWIN__)
!         if (GetLastError() != ERROR_ACCESS_DENIED)
  #else
          if (errno != EACCES)
  #endif
-             /* set errno? */
              return -1;
          if (++loops > 300)        /* time out after 30 sec */
              return -1;
--- 129,143 ----
  #endif
      {
  #if defined(WIN32) && !defined(__CYGWIN__)
!         DWORD        err = GetLastError();
!
!         _dosmaperr(err);
!
!         if (err != ERROR_SHARING_VIOLATION &&
!             err != ERROR_LOCK_VIOLATION)
  #else
          if (errno != EACCES)
  #endif
              return -1;
          if (++loops > 300)        /* time out after 30 sec */
              return -1;

pgsql-hackers by date:

Previous
From: Dimitri Fontaine
Date:
Subject: Re: Ragged CSV import
Next
From: Itagaki Takahiro
Date:
Subject: Why does LOG have higher priority than ERROR and WARNING?