Thread: rename/unlink handling for Win32

rename/unlink handling for Win32

From
Bruce Momjian
Date:
Here is my approach to the use of rename/unlink on Win32.  The full
discussion is at:

    http://momjian.postgresql.org/cgi-bin/pgtodo?win32

Basically, rename/unlink will fail if the file is opened.  You can move
the open file to another name, but that then requires open to loop in
case the file is missing.

The following patch loops over rename/unlink every 1/10th of second,
printing a warning message after 1 second, and printing a completion
message if a warning message was printed.

I looked at PeerDirect's and SRA's port, and neither provides a better
method.  I looked at PeerDirect's and it actually has some conditional
code for rename.  For example, it has a signal that is sent to all
backends to inform them to close their open WAL files.  I am not sure if
that is required for us because there is some replication stuff in
there that we aren't using. Jan?  In a few other places, it allows the
rename to fail.

I am inclined to implement it as shown, then see what delayed
rename/unlinks we get in testing.

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073
Index: configure.in
===================================================================
RCS file: /cvsroot/pgsql-server/configure.in,v
retrieving revision 1.242
diff -c -c -r1.242 configure.in
*** configure.in    6 Apr 2003 22:45:22 -0000    1.242
--- configure.in    20 Apr 2003 02:11:49 -0000
***************
*** 856,863 ****
  esac

  # Solaris has a very slow qsort in certain cases, so we replace it.
! case $host_os in
!   solaris*) AC_LIBOBJ(qsort) ;;
  esac

  # On HPUX 9, rint() is not in regular libm.a but in /lib/pa1.1/libm.a;
--- 856,868 ----
  esac

  # Solaris has a very slow qsort in certain cases, so we replace it.
! case $host_os in solaris*)
! AC_LIBOBJ(qsort) ;;
! esac
!
! # Win32 can't to rename or unlink on an open file
! case $host_os in win32*)
! AC_LIBOBJ(dirmod) ;;
  esac

  # On HPUX 9, rint() is not in regular libm.a but in /lib/pa1.1/libm.a;
Index: src/include/pg_config_manual.h
===================================================================
RCS file: /cvsroot/pgsql-server/src/include/pg_config_manual.h,v
retrieving revision 1.2
diff -c -c -r1.2 pg_config_manual.h
*** src/include/pg_config_manual.h    18 Apr 2003 01:03:42 -0000    1.2
--- src/include/pg_config_manual.h    20 Apr 2003 02:12:07 -0000
***************
*** 151,156 ****
--- 151,167 ----
  #endif

  /*
+  * Win32 doesn't have reliable rename/unlink during concurrent access
+  */
+ #ifdef WIN32
+ int pgrename(const char *from, const char *to);
+ int pgunlink(const char *path);
+ #define rename(path)        pgrename(path)
+ #define unlink(from, to)    pgunlink(from, to)
+ #endif
+
+
+ /*
   * This is the default directory in which AF_UNIX socket files are
   * placed.  Caution: changing this risks breaking your existing client
   * applications, which are likely to continue to look in the old
#ifndef TEST_VERSION

#undef rename
#undef unlink

int pgrename(const char *from, const char *to)
{
    int loops = 0;

    while (!MoveFileEx(from, to, MOVEFILE_REPLACE_EXISTING))
    {
        if (GetLastError() != ERROR_ACCESS_DENIED)
            /* set errno? */
            return -1;
        Sleep(100);    /* ms */
        if (loops == 10)
#ifndef FRONTEND
            elog(LOG, "Unable to rename %s to %s, continuing to try", from, to);
#else
            fprintf(stderr, "Unable to rename %s to %s, continuing to try\n", from, to);
#endif
        loops++;
    }

    if (loops > 10)
#ifndef FRONTEND
        elog(LOG, "Completed rename of %s to %s", from, to);
#else
        fprintf(stderr, "Completed rename of %s to %s\n", from, to);
#endif
    return 0;
}


int pgunlink(const char *path)
{
    int loops = 0;

    while (unlink(path))
    {
        if (errno != EACCES)
            /* set errno? */
            return -1;
        Sleep(100);    /* ms */
        if (loops == 10)
#ifndef FRONTEND
            elog(LOG, "Unable to unlink %s, continuing to try", path);
#else
            fprintf(stderr, "Unable to unlink %s, continuing to try\n", path);
#endif
        loops++;
    }

    if (loops > 10)
#ifndef FRONTEND
        elog(LOG, "Completed unlink of %s", path);
#else
        fprintf(stderr, "Completed unlink of %s\n", path);
#endif
    return 0;
}


#else


/*
 *  Illustrates problem with Win32 rename() and unlink()
 *    under concurrent access.
 *
 *    Run with arg '1', then less than 5 seconds later, run with
 *     arg '2' (rename) or '3'(unlink) to see the problem.
 */

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <windows.h>

#define halt(str) \
do { \
    fputs(str, stderr); \
    exit(1); \
} while (0)

int
main(int argc, char* argv[])
{
    FILE *fd;

    if (argc != 2)
        halt("Arg must be '1' (test), '2' (rename), or '3' (unlink)\n"
             "Run '1' first, then less than 5 seconds later, run\n"
             "'2' to test rename, or '3' to test unlink.\n");

    if (atoi(argv[1]) == 1)
    {
        if ((fd = fopen("/rtest.txt", "w")) == NULL)
            halt("Can not create file\n");
        fclose(fd);
        if ((fd = fopen("/rtest.txt", "r")) == NULL)
            halt("Can not open file\n");
        Sleep(5000);
    }
    else if (atoi(argv[1]) == 2)
    {
        unlink("/rtest.new");
        if ((fd = fopen("/rtest.new", "w")) == NULL)
            halt("Can not create file\n");
        fclose(fd);
        while (!MoveFileEx("/rtest.new", "/rtest.txt", MOVEFILE_REPLACE_EXISTING))
        {
            if (GetLastError() != ERROR_ACCESS_DENIED)
                halt("Unknown failure\n");
            else
                fprintf(stderr, "move failed\n");
            Sleep(500);
        }
        halt("move successful\n");
    }
    else if (atoi(argv[1]) == 3)
    {
        while (unlink("/rtest.txt"))
        {
            if (errno != EACCES)
                halt("Unknown failure\n");
            else
                fprintf(stderr, "unlink failed\n");
            Sleep(500);
        }
        halt("unlink successful\n");
    }
    else
        halt("invalid arg\n");

    return 0;
}
#endif

Re: rename/unlink handling for Win32

From
Bruce Momjian
Date:
Oh, one more thing.  dirmod.c has Win32 source to show the actual hangs
caused by rename/unlink --- just define TEST_VERSION and compile.  If
someone wants a binary, let me know.

And sorry I posted to hackers rather than patches, where it belongs.

---------------------------------------------------------------------------

Bruce Momjian wrote:
> Here is my approach to the use of rename/unlink on Win32.  The full
> discussion is at:
> 
>     http://momjian.postgresql.org/cgi-bin/pgtodo?win32
> 
> Basically, rename/unlink will fail if the file is opened.  You can move
> the open file to another name, but that then requires open to loop in
> case the file is missing.
> 
> The following patch loops over rename/unlink every 1/10th of second,
> printing a warning message after 1 second, and printing a completion
> message if a warning message was printed.
> 
> I looked at PeerDirect's and SRA's port, and neither provides a better
> method.  I looked at PeerDirect's and it actually has some conditional
> code for rename.  For example, it has a signal that is sent to all
> backends to inform them to close their open WAL files.  I am not sure if
> that is required for us because there is some replication stuff in
> there that we aren't using. Jan?  In a few other places, it allows the
> rename to fail.
> 
> I am inclined to implement it as shown, then see what delayed
> rename/unlinks we get in testing.

--  Bruce Momjian                        |  http://candle.pha.pa.us pgman@candle.pha.pa.us               |  (610)
359-1001+  If your life is a hard drive,     |  13 Roberts Road +  Christ can be your backup.        |  Newtown Square,
Pennsylvania19073
 



Re: rename/unlink handling for Win32

From
Peter Eisentraut
Date:
Bruce Momjian writes:

> The following patch loops over rename/unlink every 1/10th of second,
> printing a warning message after 1 second, and printing a completion
> message if a warning message was printed.

I don't like that; it seems arbitrary.  How does the need to wait relate
to other factors, such as the system load?

About the code:  The code you placed into pg_config_manual.h must go into
some other header file, probably a separate one that parallels the .c
file.  Also, I would prefer if the C files in src/port were named after
the function they implement, so rename.c.

It might also be cleaner if we changed the code to use remove() instead of
unlink(), since the ISO C standard uses the former whereas the latter is
Unix-ish.

-- 
Peter Eisentraut   peter_e@gmx.net