Re: Can we avoid chdir'ing in resolve_symlinks() ? - Mailing list pgsql-hackers

From Tom Lane
Subject Re: Can we avoid chdir'ing in resolve_symlinks() ?
Date
Msg-id 1568831.1662259313@sss.pgh.pa.us
Whole thread Raw
In response to Re: Can we avoid chdir'ing in resolve_symlinks() ?  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: Can we avoid chdir'ing in resolve_symlinks() ?
List pgsql-hackers
Here's a draft patch for this.  It seems to work on Linux,
but the Windows code is just speculation.  In particular,
I did

    path = _fullpath(NULL, fname, 0);
    if (path == NULL)
        _dosmaperr(GetLastError());

but I'm not really sure that the _dosmaperr bit is needed,
because the _fullpath man page I found makes reference to
setting "errno" [1].  It's likely to be hard to test, because
most of the possible error cases should be nigh unreachable
in our usage; we already know the input is a valid reference
to an executable file.

BTW, I noticed what seems a flat-out bug in validate_exec:

        /* Win32 requires a .exe suffix for stat() */
-       if (strlen(path) >= strlen(".exe") &&
+       if (strlen(path) < strlen(".exe") ||
                pg_strcasecmp(path + strlen(path) - strlen(".exe"), ".exe") != 0)

Nobody's noticed because none of our executables have base names
shorter than 4 characters, but it's still a bug.

            regards, tom lane

[1] https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/fullpath-wfullpath?view=msvc-170

diff --git a/src/common/exec.c b/src/common/exec.c
index 22f04aafbe..fa01ff7b32 100644
--- a/src/common/exec.c
+++ b/src/common/exec.c
@@ -14,6 +14,15 @@
  *-------------------------------------------------------------------------
  */

+/*
+ * On macOS, "man realpath" avers:
+ *    Defining _DARWIN_C_SOURCE or _DARWIN_BETTER_REALPATH before including
+ *    stdlib.h will cause the provided implementation of realpath() to use
+ *    F_GETPATH from fcntl(2) to discover the path.
+ * This should be harmless everywhere else.
+ */
+#define _DARWIN_BETTER_REALPATH
+
 #ifndef FRONTEND
 #include "postgres.h"
 #else
@@ -58,11 +67,8 @@ extern int    _CRT_glob = 0;        /* 0 turns off globbing; 1 turns it on */
     (fprintf(stderr, __VA_ARGS__), fputc('\n', stderr))
 #endif

-#ifdef _MSC_VER
-#define getcwd(cwd,len)  GetCurrentDirectory(len, cwd)
-#endif
-
-static int    resolve_symlinks(char *path);
+static int    normalize_exec_path(char *path);
+static char *pg_realpath(const char *fname);

 #ifdef WIN32
 static BOOL GetTokenUser(HANDLE hToken, PTOKEN_USER *ppTokenUser);
@@ -87,7 +93,7 @@ validate_exec(const char *path)
     char        path_exe[MAXPGPATH + sizeof(".exe") - 1];

     /* Win32 requires a .exe suffix for stat() */
-    if (strlen(path) >= strlen(".exe") &&
+    if (strlen(path) < strlen(".exe") ||
         pg_strcasecmp(path + strlen(path) - strlen(".exe"), ".exe") != 0)
     {
         strlcpy(path_exe, path, sizeof(path_exe) - 4);
@@ -151,30 +157,16 @@ validate_exec(const char *path)
 int
 find_my_exec(const char *argv0, char *retpath)
 {
-    char        cwd[MAXPGPATH],
-                test_path[MAXPGPATH];
     char       *path;

-    if (!getcwd(cwd, MAXPGPATH))
-    {
-        log_error(errcode_for_file_access(),
-                  _("could not identify current directory: %m"));
-        return -1;
-    }
-
     /*
      * If argv0 contains a separator, then PATH wasn't used.
      */
-    if (first_dir_separator(argv0) != NULL)
+    strlcpy(retpath, argv0, MAXPGPATH);
+    if (first_dir_separator(retpath) != NULL)
     {
-        if (is_absolute_path(argv0))
-            strlcpy(retpath, argv0, MAXPGPATH);
-        else
-            join_path_components(retpath, cwd, argv0);
-        canonicalize_path(retpath);
-
         if (validate_exec(retpath) == 0)
-            return resolve_symlinks(retpath);
+            return normalize_exec_path(retpath);

         log_error(errcode(ERRCODE_WRONG_OBJECT_TYPE),
                   _("invalid binary \"%s\": %m"), retpath);
@@ -183,9 +175,8 @@ find_my_exec(const char *argv0, char *retpath)

 #ifdef WIN32
     /* Win32 checks the current directory first for names without slashes */
-    join_path_components(retpath, cwd, argv0);
     if (validate_exec(retpath) == 0)
-        return resolve_symlinks(retpath);
+        return normalize_exec_path(retpath);
 #endif

     /*
@@ -208,21 +199,15 @@ find_my_exec(const char *argv0, char *retpath)
             if (!endp)
                 endp = startp + strlen(startp); /* point to end */

-            strlcpy(test_path, startp, Min(endp - startp + 1, MAXPGPATH));
+            strlcpy(retpath, startp, Min(endp - startp + 1, MAXPGPATH));

-            if (is_absolute_path(test_path))
-                join_path_components(retpath, test_path, argv0);
-            else
-            {
-                join_path_components(retpath, cwd, test_path);
-                join_path_components(retpath, retpath, argv0);
-            }
+            join_path_components(retpath, retpath, argv0);
             canonicalize_path(retpath);

             switch (validate_exec(retpath))
             {
                 case 0:            /* found ok */
-                    return resolve_symlinks(retpath);
+                    return normalize_exec_path(retpath);
                 case -1:        /* wasn't even a candidate, keep looking */
                     break;
                 case -2:        /* found but disqualified */
@@ -241,105 +226,90 @@ find_my_exec(const char *argv0, char *retpath)


 /*
- * resolve_symlinks - resolve symlinks to the underlying file
+ * normalize_exec_path - resolve symlinks and convert to absolute path
  *
- * Replace "path" by the absolute path to the referenced file.
+ * Given a path that refers to an executable, chase through any symlinks
+ * to find the real file location; then convert that to an absolute path.
  *
+ * On success, replaces the contents of "path" with the absolute path.
+ * ("path" is assumed to be of size MAXPGPATH.)
  * Returns 0 if OK, -1 if error.
- *
- * Note: we are not particularly tense about producing nice error messages
- * because we are not really expecting error here; we just determined that
- * the symlink does point to a valid executable.
- *
- * Here we test HAVE_READLINK, which excludes Windows.  There's no point in
- * using our junction point-based replacement code for this, because that only
- * works for directories.
  */
 static int
-resolve_symlinks(char *path)
+normalize_exec_path(char *path)
 {
-#ifdef HAVE_READLINK
-    struct stat buf;
-    char        orig_wd[MAXPGPATH],
-                link_buf[MAXPGPATH];
-    char       *fname;
-
     /*
-     * To resolve a symlink properly, we have to chdir into its directory and
-     * then chdir to where the symlink points; otherwise we may fail to
-     * resolve relative links correctly (consider cases involving mount
-     * points, for example).  After following the final symlink, we use
-     * getcwd() to figure out where the heck we're at.
-     *
-     * One might think we could skip all this if path doesn't point to a
-     * symlink to start with, but that's wrong.  We also want to get rid of
-     * any directory symlinks that are present in the given path. We expect
-     * getcwd() to give us an accurate, symlink-free path.
+     * We used to do a lot of work ourselves here, but now we just let
+     * realpath(3) do all the heavy lifting.
      */
-    if (!getcwd(orig_wd, MAXPGPATH))
+    char       *abspath = pg_realpath(path);
+
+    if (abspath == NULL)
     {
         log_error(errcode_for_file_access(),
-                  _("could not identify current directory: %m"));
+                  _("could not resolve path \"%s\" to absolute form: %m"),
+                  path);
         return -1;
     }
+    strlcpy(path, abspath, MAXPGPATH);
+    free(abspath);

-    for (;;)
-    {
-        char       *lsep;
-        int            rllen;
-
-        lsep = last_dir_separator(path);
-        if (lsep)
-        {
-            *lsep = '\0';
-            if (chdir(path) == -1)
-            {
-                log_error(errcode_for_file_access(),
-                          _("could not change directory to \"%s\": %m"), path);
-                return -1;
-            }
-            fname = lsep + 1;
-        }
-        else
-            fname = path;
+#ifdef WIN32
+    /* On Windows, be sure to convert '\' to '/' */
+    canonicalize_path(path);
+#endif

-        if (lstat(fname, &buf) < 0 ||
-            !S_ISLNK(buf.st_mode))
-            break;
+    return 0;
+}

-        errno = 0;
-        rllen = readlink(fname, link_buf, sizeof(link_buf));
-        if (rllen < 0 || rllen >= sizeof(link_buf))
-        {
-            log_error(errcode_for_file_access(),
-                      _("could not read symbolic link \"%s\": %m"), fname);
-            return -1;
-        }
-        link_buf[rllen] = '\0';
-        strcpy(path, link_buf);
-    }

-    /* must copy final component out of 'path' temporarily */
-    strlcpy(link_buf, fname, sizeof(link_buf));
+/*
+ * pg_realpath() - realpath(3) with POSIX.1-2008 semantics
+ *
+ * This is equivalent to realpath(fname, NULL), in that it returns a
+ * malloc'd buffer containing the absolute path equivalent to fname.
+ * On error, returns NULL with errno set.
+ *
+ * On Windows, what you get is spelled per platform conventions,
+ * so you probably want to apply canonicalize_path() to the result.
+ *
+ * For now, this is needed only here so mark it static.  If you choose to
+ * move it into its own file, move the _DARWIN_BETTER_REALPATH #define too!
+ */
+static char *
+pg_realpath(const char *fname)
+{
+    char       *path;

-    if (!getcwd(path, MAXPGPATH))
+#ifndef WIN32
+    path = realpath(fname, NULL);
+    if (path == NULL && errno == EINVAL)
     {
-        log_error(errcode_for_file_access(),
-                  _("could not identify current directory: %m"));
-        return -1;
-    }
-    join_path_components(path, path, link_buf);
-    canonicalize_path(path);
+        /*
+         * Cope with old-POSIX systems that require a user-provided buffer.
+         * Assume MAXPGPATH is enough room on all such systems.
+         */
+        char       *buf = malloc(MAXPGPATH);

-    if (chdir(orig_wd) == -1)
-    {
-        log_error(errcode_for_file_access(),
-                  _("could not change directory to \"%s\": %m"), orig_wd);
-        return -1;
+        if (buf == NULL)
+            return NULL;        /* assume errno is set */
+        path = realpath(fname, buf);
+        if (path == NULL)        /* don't leak memory */
+        {
+            int            save_errno = errno;
+
+            free(buf);
+            errno = save_errno;
+        }
     }
-#endif                            /* HAVE_READLINK */
+#else                            /* WIN32 */
+    /* Microsoft is resolutely non-POSIX, but _fullpath does the same thing */
+    path = _fullpath(NULL, fname, 0);
+    if (path == NULL)
+        _dosmaperr(GetLastError());
+#endif

-    return 0;
+    return path;
 }



pgsql-hackers by date:

Previous
From: John Naylor
Date:
Subject: Re: warning: comparison of integer expressions of different signedness related to simd.h
Next
From: Reid Thompson
Date:
Subject: Re: Add tracking of backend memory allocated to pg_stat_activity