Re: BUG #18735: Specific multibyte character in psql file path command parameter for Windows - Mailing list pgsql-bugs
From | Tom Lane |
---|---|
Subject | Re: BUG #18735: Specific multibyte character in psql file path command parameter for Windows |
Date | |
Msg-id | 3768611.1733698512@sss.pgh.pa.us Whole thread Raw |
In response to | Re: BUG #18735: Specific multibyte character in psql file path command parameter for Windows (Tatsuo Ishii <ishii@postgresql.org>) |
List | pgsql-bugs |
Tatsuo Ishii <ishii@postgresql.org> writes: >> Another way, given that we only really need this to work for >> SJIS, is to hard-wire the logic into path.c --- it's not like >> pg_sjis_mblen is either long or likely to change. That's >> ugly but would be a lot less invasive and safer to back-patch. >> I'm leaning a bit to the second way, mainly because of the >> extern-relocation annoyance. > +1. I believe the logic of detecting byte length in Shift-JIS will not > be changed. OK. I know I said I wasn't going to write this, but here's a draft patch that assumes we need only touch psql and can rely on its idea of client_encoding. I'm not in a position to test this. regards, tom lane diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c index 1f3cbb11f7..bffe614f00 100644 --- a/src/bin/psql/command.c +++ b/src/bin/psql/command.c @@ -1201,7 +1201,7 @@ exec_command_edit(PsqlScanState scan_state, bool active_branch, expand_tilde(&fname); if (fname) { - canonicalize_path(fname); + canonicalize_path_enc(fname, pset.encoding); /* Always clear buffer if the file isn't modified */ discard_on_quit = true; } @@ -2819,7 +2819,7 @@ exec_command_write(PsqlScanState scan_state, bool active_branch, } else { - canonicalize_path(fname); + canonicalize_path_enc(fname, pset.encoding); fd = fopen(fname, "w"); } if (!fd) @@ -4423,7 +4423,7 @@ process_file(char *filename, bool use_relative_path) } else if (strcmp(filename, "-") != 0) { - canonicalize_path(filename); + canonicalize_path_enc(filename, pset.encoding); /* * If we were asked to resolve the pathname relative to the location @@ -4437,7 +4437,7 @@ process_file(char *filename, bool use_relative_path) strlcpy(relpath, pset.inputfile, sizeof(relpath)); get_parent_directory(relpath); join_path_components(relpath, relpath, filename); - canonicalize_path(relpath); + canonicalize_path_enc(relpath, pset.encoding); filename = relpath; } diff --git a/src/bin/psql/copy.c b/src/bin/psql/copy.c index e020e4d665..1cfe31f164 100644 --- a/src/bin/psql/copy.c +++ b/src/bin/psql/copy.c @@ -280,7 +280,7 @@ do_copy(const char *args) /* prepare to read or write the target file */ if (options->file && !options->program) - canonicalize_path(options->file); + canonicalize_path_enc(options->file, pset.encoding); if (options->from) { diff --git a/src/include/port.h b/src/include/port.h index ba9ab0d34f..1665ac0258 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -53,6 +53,7 @@ extern char *first_path_var_separator(const char *pathlist); extern void join_path_components(char *ret_path, const char *head, const char *tail); extern void canonicalize_path(char *path); +extern void canonicalize_path_enc(char *path, int encoding); extern void make_native_path(char *filename); extern void cleanup_path(char *path); extern bool path_contains_parent_reference(const char *path); diff --git a/src/port/path.c b/src/port/path.c index de4df6cd78..02ab3188db 100644 --- a/src/port/path.c +++ b/src/port/path.c @@ -36,6 +36,7 @@ #include <unistd.h> #endif +#include "mb/pg_wchar.h" #include "pg_config_paths.h" @@ -45,6 +46,10 @@ #define IS_PATH_VAR_SEP(ch) ((ch) == ';') #endif +#ifdef WIN32 +static void debackslash_path(char *path, int encoding); +static int pg_sjis_mblen(const unsigned char *s); +#endif static void make_relative_path(char *ret_path, const char *target_path, const char *bin_path, const char *my_exec_path); static char *trim_directory(char *path); @@ -149,10 +154,73 @@ last_dir_separator(const char *filename) } +#ifdef WIN32 + +/* + * Convert '\' to '/' within the given path, assuming the path + * is in the specified encoding. + */ +static void +debackslash_path(char *path, int encoding) +{ + char *p; + + /* + * Of the supported encodings, only Shift-JIS has multibyte characters + * that can include a byte equal to '\' (0x5C). So rather than implement + * a fully encoding-aware conversion, we special-case SJIS. (Invoking the + * general encoding-aware logic in wchar.c is impractical here for + * assorted reasons.) + */ + if (encoding == PG_SJIS) + { + for (p = path; *p; p += pg_sjis_mblen((const unsigned char *) p)) + { + if (*p == '\\') + *p = '/'; + } + } + else + { + for (p = path; *p; p++) + { + if (*p == '\\') + *p = '/'; + } + } +} + /* - * make_native_path - on WIN32, change / to \ in the path + * SJIS character length * - * This effectively undoes canonicalize_path. + * This must match the behavior of + * pg_encoding_mblen_bounded(PG_SJIS, s) + * In particular, unlike the version of pg_sjis_mblen in src/common/wchar.c, + * do not allow caller to accidentally step past end-of-string. + */ +static int +pg_sjis_mblen(const unsigned char *s) +{ + int len; + + if (*s >= 0xa1 && *s <= 0xdf) + len = 1; /* 1 byte kana? */ + else if (IS_HIGHBIT_SET(*s) && s[1] != '\0') + len = 2; /* kanji? */ + else + len = 1; /* should be ASCII */ + return len; +} + +#endif /* WIN32 */ + + +/* + * make_native_path - on WIN32, change '/' to '\' in the path + * + * This reverses the '\'-to-'/' transformation of debackslash_path. + * We need not worry about encodings here, since '/' does not appear + * as a byte of a multibyte character in any supported encoding. * * This is required because WIN32 COPY is an internal CMD.EXE * command and doesn't process forward slashes in the same way @@ -182,13 +250,14 @@ make_native_path(char *filename) * on Windows. We need them to use filenames without spaces, for which a * short filename is the safest equivalent, eg: * C:/Progra~1/ + * + * Presently, this is only used on paths that we can assume are in a + * backend-safe encoding, so there's no need for an encoding-aware variant. */ void cleanup_path(char *path) { #ifdef WIN32 - char *ptr; - /* * GetShortPathName() will fail if the path does not exist, or short names * are disabled on this file system. In both cases, we just return the @@ -198,11 +267,8 @@ cleanup_path(char *path) GetShortPathName(path, path, MAXPGPATH - 1); /* Replace '\' with '/' */ - for (ptr = path; *ptr; ptr++) - { - if (*ptr == '\\') - *ptr = '/'; - } + /* All server-safe encodings are alike here, so just use PG_SQL_ASCII */ + debackslash_path(path, PG_SQL_ASCII); #endif } @@ -253,6 +319,8 @@ typedef enum } canonicalize_state; /* + * canonicalize_path() + * * Clean up path by: * o make Win32 path use Unix slashes * o remove trailing quote on Win32 @@ -260,9 +328,20 @@ typedef enum * o remove duplicate (adjacent) separators * o remove '.' (unless path reduces to only '.') * o process '..' ourselves, removing it if possible + * Modifies path in-place. + * + * This comes in two variants: encoding-aware and not. The non-aware version + * is only safe to use on strings that are in a server-safe encoding. */ void canonicalize_path(char *path) +{ + /* All server-safe encodings are alike here, so just use PG_SQL_ASCII */ + canonicalize_path_enc(path, PG_SQL_ASCII); +} + +void +canonicalize_path_enc(char *path, int encoding) { char *p, *to_p; @@ -278,17 +357,15 @@ canonicalize_path(char *path) /* * The Windows command processor will accept suitably quoted paths with * forward slashes, but barfs badly with mixed forward and back slashes. + * Hence, start by converting all back slashes to forward slashes. */ - for (p = path; *p; p++) - { - if (*p == '\\') - *p = '/'; - } + debackslash_path(path, encoding); /* * In Win32, if you do: prog.exe "a b" "\c\d\" the system will pass \c\d" * as argv[2], so trim off trailing quote. */ + p = path + strlen(path); if (p > path && *(p - 1) == '"') *(p - 1) = '/'; #endif
pgsql-bugs by date: