From ea19cd4953c45adb235af33e173933c0fb0f5730 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Sat, 25 Oct 2025 18:04:06 +1300 Subject: [PATCH] Allow UTF-8 locales to use strcoll_l() on Windows. On Windows we allow locales with non-UTF-8 encodings in UTF-8 databases for historical reasons, and convert to wchar_t when collating strings. Allow plain strcoll_l() to be reached instead of wcscoll_l() when the locale is a UTF-8. XXX Does this work as expected? XXX How is the performance? It might be converting to wchar_t internally anyway, depending on whether it can work incrementally like ICU. --- src/backend/utils/adt/pg_locale_libc.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c index f56b5dbdd37..a075ab26893 100644 --- a/src/backend/utils/adt/pg_locale_libc.c +++ b/src/backend/utils/adt/pg_locale_libc.c @@ -722,7 +722,8 @@ create_pg_locale_libc(Oid collid, MemoryContext context) if (!result->collate_is_c) { #ifdef WIN32 - if (GetDatabaseEncoding() == PG_UTF8) + if (GetDatabaseEncoding() == PG_UTF8 && + pg_get_encoding_from_locale(collate, true) != PG_UTF8) result->collate = &collate_methods_libc_win32_utf8; else #endif @@ -975,8 +976,13 @@ get_collation_actual_version_libc(const char *collcollate) /* * strncoll_libc_win32_utf8 * - * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and - * invoke wcscoll_l(). + * Historical versions of Windows didn't have UTF-8 locales. To support UTF-8 + * databases, we allowed *any* locale to be used in UTF-8 databases (see + * check_locale_encoding()). This function supports mismatched encodings by + * converting strings to wchar_t on the fly and calling wcscoll_l(). + * + * This is not called for UTF-8 locales in UTF-8 databases, but is still needed + * as long as we tolerate mismatches. * * An input string length of -1 means that it's NUL-terminated. */ -- 2.51.1