From dc726a61aace86bda62687e3aa1411753ba3f1a4 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Tue, 19 Jul 2022 06:31:17 +1200 Subject: [PATCH v5 2/2] Default to IETF BCP 47 locale names in initdb on Windows. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid selecting traditional Windows locale names written with English words, because (1) they are unstable and explicitly not recommended for use in databases and (2) they may contain non-ASCII characters, which we can't put in our shared catalogs. Since setlocale() returns such names, on Windows use GetUserDefaultLocaleName() if the user didn't provide an explicit locale. It returns BCP 47 strings like "en-US". Also update the documentation to recommend BCP 47 over the traditional names when providing explicit values to initdb. Reviewed-by: Juan José Santamaría Flecha Reviewed-by: Discussion: https://postgr.es/m/CA%2BhUKGJ%3DXThErgAQRoqfCy1bKPxXVuF0%3D2zDbB%2BSxDs59pv7Fw%40mail.gmail.com --- doc/src/sgml/charset.sgml | 13 +++++++++++-- src/bin/initdb/initdb.c | 31 +++++++++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index 834cb30c85a..adb21eb0799 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -83,8 +83,17 @@ initdb --locale=sv_SE system under what names depends on what was provided by the operating system vendor and what was installed. On most Unix systems, the command locale -a will provide a list of available locales. - Windows uses more verbose locale names, such as German_Germany - or Swedish_Sweden.1252, but the principles are the same. + + + + Windows uses BCP 47 language tags, like ICU. + For example, sv-SE represents Swedish as spoken in Sweden. + Windows also supports more verbose locale names based on full names + such as German_Germany or Swedish_Sweden.1252, + but these are not recommended because they are not stable across operating + system updates due to changes in geographical names, and may contain + non-ASCII characters which are not supported in PostgreSQL's shared + catalogs. diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index f00718a0150..393232b6cec 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -64,6 +64,10 @@ #include "sys/mman.h" #endif +#ifdef WIN32 +#include +#endif + #include "access/xlog_internal.h" #include "catalog/pg_authid_d.h" #include "catalog/pg_class_d.h" /* pgrminclude ignore */ @@ -2132,6 +2136,7 @@ locale_date_order(const char *locale) static void check_locale_name(int category, const char *locale, char **canonname) { + char *locale_copy; char *save; char *res; @@ -2147,10 +2152,30 @@ check_locale_name(int category, const char *locale, char **canonname) /* for setlocale() call */ if (!locale) - locale = ""; + { +#ifdef WIN32 + wchar_t wide_name[LOCALE_NAME_MAX_LENGTH]; + char name[LOCALE_NAME_MAX_LENGTH]; + + /* use Windows API to find the default in BCP47 format */ + if (GetUserDefaultLocaleName(wide_name, LOCALE_NAME_MAX_LENGTH) == 0) + pg_fatal("failed to get default locale name: error code %lu", + GetLastError()); + if (WideCharToMultiByte(CP_ACP, 0, wide_name, -1, name, + LOCALE_NAME_MAX_LENGTH, NULL, NULL) == 0) + pg_fatal("failed to convert locale name: error code %lu", + GetLastError()); + locale_copy = pg_strdup(name); +#else + /* use environment to find the default */ + locale_copy = pg_strdup(""); +#endif + } + else + locale_copy = pg_strdup(locale); /* set the locale with setlocale, to see if it accepts it. */ - res = setlocale(category, locale); + res = setlocale(category, locale_copy); /* save canonical name if requested. */ if (res && canonname) @@ -2183,6 +2208,8 @@ check_locale_name(int category, const char *locale, char **canonname) pg_fatal("invalid locale settings; check LANG and LC_* environment variables"); } } + + free(locale_copy); } /* -- 2.45.2